Rewrite setup.py. (#5271)

The setup.py is rewritten.  This new script uses only Python code and provide customized
implementation of setuptools commands.  This way users can run most of setuptools commands
just like any other Python libraries.

* Remove setup_pip.py
* Remove soft links.
* Define customized commands.
* Remove shell script.
* Remove makefile script.
* Update the doc for building from source.
This commit is contained in:
Jiaming Yuan
2020-02-04 13:35:42 +08:00
committed by GitHub
parent e4b74c4d22
commit 595a00466d
16 changed files with 502 additions and 358 deletions

View File

@@ -1,16 +1,11 @@
include *.md *.rst
include *.rst
include xgboost/VERSION
include xgboost/CMakeLists.txt
recursive-include xgboost *
recursive-include xgboost/include *
recursive-include xgboost/src *
recursive-include xgboost/make *
recursive-include xgboost/rabit *
recursive-include xgboost/lib *
recursive-include xgboost/dmlc-core *
#exclude pre-compiled .o and .a file for less confusions
#make sure .a files are all removed for forcing compiling
#include the pre-compiled .so is needed as a placeholder
#since it will be copy after compiling on the fly
global-exclude *.o
global-exclude *.a
global-exclude *.pyo
global-exclude *.pyc
global-exclude *.py[oc]

View File

@@ -1,11 +0,0 @@
# this script is for preparation for PyPI installation package,
# please don't use it for installing xgboost from github
# after executing `make pippack`, cd xgboost-python,
#run this script and get the sdist tar.gz in ./dist/
sh ./xgboost/build-python.sh
cp setup_pip.py setup.py
python setup.py sdist
#make sure you know what you gonna do, and uncomment the following line
#python setup.py register upload

View File

@@ -1,68 +1,313 @@
# pylint: disable=invalid-name, exec-used
"""Setup xgboost package."""
from __future__ import absolute_import
import io
import sys
import os
from setuptools import setup, find_packages
import shutil
import subprocess
import logging
import distutils
import sys
from platform import system
from setuptools import setup, find_packages, Extension
from setuptools.command import build_ext, sdist, install_lib, install
# import subprocess
sys.path.insert(0, '.')
# You can't use `pip install .` as pip copies setup.py to a temporary
# directory, parent directory is no longer reachable (isolated build) .
CURRENT_DIR = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, CURRENT_DIR)
CURRENT_DIR = os.path.dirname(__file__)
USER_OPTIONS = {
'use-openmp': (None, 'Build with OpenMP support.', 1),
'use-cuda': (None, 'Build with GPU acceleration.', 0),
'use-nccl': (None, 'Build with NCCL to enable distributed GPU support.', 0),
'build-with-shared-nccl': (None, 'Build with shared NCCL library.', 0),
'use-hdfs': (None, 'Build with HDFS support', 0),
'use-azure': (None, 'Build with AZURE support.', 0),
'use-s3': (None, 'Build with S3 support', 0),
'plugin-lz4': (None, 'Build lz4 plugin.', 0),
'plugin-dense-parser': (None, 'Build dense parser plugin.', 0)
}
# We can not import `xgboost.libpath` in setup.py directly since xgboost/__init__.py
# import `xgboost.core` and finally will import `numpy` and `scipy` which are setup
# `install_requires`. That's why we're using `exec` here.
libpath_py = os.path.join(CURRENT_DIR, 'xgboost/libpath.py')
libpath = {'__file__': libpath_py}
exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath)
NEED_CLEAN_TREE = set()
NEED_CLEAN_FILE = set()
BUILD_TEMP_DIR = None
LIB_PATH = []
for libfile in libpath['find_lib_path']():
try:
relpath = os.path.relpath(libfile, CURRENT_DIR)
LIB_PATH.append(relpath)
break # need only one
except ValueError:
continue
print("Install libxgboost from: %s" % LIB_PATH)
def lib_name():
'''Return platform dependent shared object name.'''
if system() == 'Linux' or system().upper().endswith('BSD'):
name = 'libxgboost.so'
elif system() == 'Darwin':
name = 'libxgboost.dylib'
elif system() == 'Windows':
name = 'xgboost.dll'
return name
# Please use setup_pip.py for generating and deploying pip installation
# detailed instruction in setup_pip.py
setup(name='xgboost',
version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
description="XGBoost Python Package",
long_description=io.open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8').read(),
install_requires=[
'numpy',
'scipy',
],
extras_require={
'pandas': ['pandas'],
'sklearn': ['sklearn'],
'dask': ['dask', 'pandas', 'distributed'],
'datatable': ['datatable'],
'plotting': ['graphviz', 'matplotlib']
},
maintainer='Hyunsu Cho',
maintainer_email='chohyu01@cs.washington.edu',
zip_safe=False,
packages=find_packages(),
# this will use MANIFEST.in during install where we specify additional files,
# this is the golden line
include_package_data=True,
data_files=[('xgboost', LIB_PATH)],
license='Apache-2.0',
classifiers=['License :: OSI Approved :: Apache Software License',
'Development Status :: 5 - Production/Stable',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'],
python_requires='>=3.5',
url='https://github.com/dmlc/xgboost')
def copy_tree(src_dir, target_dir):
'''Copy source tree into build directory.'''
def clean_copy_tree(src, dst):
distutils.dir_util.copy_tree(src, dst)
NEED_CLEAN_TREE.add(os.path.abspath(dst))
def clean_copy_file(src, dst):
distutils.file_util.copy_file(src, dst)
NEED_CLEAN_FILE.add(os.path.abspath(dst))
src = os.path.join(src_dir, 'src')
inc = os.path.join(src_dir, 'include')
dmlc_core = os.path.join(src_dir, 'dmlc-core')
rabit = os.path.join(src_dir, 'rabit')
cmake = os.path.join(src_dir, 'cmake')
plugin = os.path.join(src_dir, 'plugin')
clean_copy_tree(src, os.path.join(target_dir, 'src'))
clean_copy_tree(inc, os.path.join(target_dir, 'include'))
clean_copy_tree(dmlc_core, os.path.join(target_dir, 'dmlc-core'))
clean_copy_tree(rabit, os.path.join(target_dir, 'rabit'))
clean_copy_tree(cmake, os.path.join(target_dir, 'cmake'))
clean_copy_tree(plugin, os.path.join(target_dir, 'plugin'))
cmake_list = os.path.join(src_dir, 'CMakeLists.txt')
clean_copy_file(cmake_list, os.path.join(target_dir, 'CMakeLists.txt'))
lic = os.path.join(src_dir, 'LICENSE')
clean_copy_file(lic, os.path.join(target_dir, 'LICENSE'))
def clean_up():
'''Removed copied files.'''
for path in NEED_CLEAN_TREE:
shutil.rmtree(path)
for path in NEED_CLEAN_FILE:
os.remove(path)
class CMakeExtension(Extension): # pylint: disable=too-few-public-methods
'''Wrapper for extension'''
def __init__(self, name):
super().__init__(name=name, sources=[])
class BuildExt(build_ext.build_ext):
'''Custom build_ext command using CMake.'''
logger = logging.getLogger('XGBoost build_ext')
# pylint: disable=too-many-arguments,no-self-use
def build(self, src_dir, build_dir, generator, build_tool=None, use_omp=1):
'''Build the core library with CMake.'''
cmake_cmd = ['cmake', src_dir, generator]
for k, v in USER_OPTIONS.items():
arg = k.replace('-', '_').upper()
value = str(v[2])
cmake_cmd.append('-D' + arg + '=' + value)
if k == 'USE_OPENMP' and use_omp == 0:
continue
if system() == 'Windows':
cmake_cmd.append('--build')
subprocess.check_call(cmake_cmd, cwd=build_dir)
if system() != 'Windows':
nproc = os.cpu_count()
subprocess.check_call([build_tool, '-j' + str(nproc)],
cwd=build_dir)
def build_cmake_extension(self):
'''Configure and build using CMake'''
src_dir = 'xgboost'
try:
copy_tree(os.path.join(CURRENT_DIR, os.path.pardir),
os.path.join(self.build_temp, src_dir))
except Exception: # pylint: disable=broad-except
copy_tree(src_dir, os.path.join(self.build_temp, src_dir))
build_dir = self.build_temp
global BUILD_TEMP_DIR # pylint: disable=global-statement
BUILD_TEMP_DIR = build_dir
libxgboost = os.path.abspath(
os.path.join(CURRENT_DIR, os.path.pardir, 'lib', lib_name()))
if os.path.exists(libxgboost):
self.logger.info('Found shared library, skipping build.')
return
self.logger.info('Building from source. %s', libxgboost)
if not os.path.exists(build_dir):
os.mkdir(build_dir)
if shutil.which('ninja'):
build_tool = 'ninja'
else:
build_tool = 'make'
if system() == 'Windows':
# Pick up from LGB, just test every possible tool chain.
for vs in ('-GVisual Studio 16 2019', '-GVisual Studio 15 2017',
'-GVisual Studio 14 2015', '-GMinGW Makefiles'):
try:
self.build(src_dir, build_dir, vs)
self.logger.info(
'%s is used for building Windows distribution.', vs)
break
except subprocess.CalledProcessError:
continue
else:
gen = '-GNinja' if build_tool == 'ninja' else '-GUnix Makefiles'
try:
self.build(src_dir, build_dir, gen, build_tool, use_omp=1)
except subprocess.CalledProcessError:
self.logger.warning('Disabling OpenMP support.')
self.build(src_dir, build_dir, gen, build_tool, use_omp=0)
def build_extension(self, ext):
'''Override the method for dispatching.'''
if isinstance(ext, CMakeExtension):
self.build_cmake_extension()
else:
super().build_extension(ext)
def copy_extensions_to_source(self):
'''Dummy override. Invoked during editable installation. Our binary
should available in `lib`.
'''
if not os.path.exists(
os.path.join(CURRENT_DIR, os.path.pardir, 'lib', lib_name())):
raise ValueError('For using editable installation, please ' +
'build the shared object first with CMake.')
class Sdist(sdist.sdist): # pylint: disable=too-many-ancestors
'''Copy c++ source into Python directory.'''
logger = logging.getLogger('xgboost sdist')
def run(self):
copy_tree(os.path.join(CURRENT_DIR, os.path.pardir),
os.path.join(CURRENT_DIR, 'xgboost'))
libxgboost = os.path.join(
CURRENT_DIR, os.path.pardir, 'lib', lib_name())
if os.path.exists(libxgboost):
self.logger.warning(
'Found shared library, removing to avoid being included in source distribution.'
)
os.remove(libxgboost)
super().run()
class InstallLib(install_lib.install_lib):
'''Copy shared object into installation directory.'''
logger = logging.getLogger('xgboost install_lib')
def install(self):
outfiles = super().install()
lib_dir = os.path.join(self.install_dir, 'xgboost', 'lib')
if not os.path.exists(lib_dir):
os.mkdir(lib_dir)
dst = os.path.join(self.install_dir, 'xgboost', 'lib', lib_name())
global BUILD_TEMP_DIR # pylint: disable=global-statement
libxgboost_path = lib_name()
dft_lib_dir = os.path.join(CURRENT_DIR, os.path.pardir, 'lib')
build_dir = os.path.join(BUILD_TEMP_DIR, 'xgboost', 'lib')
if os.path.exists(os.path.join(dft_lib_dir, libxgboost_path)):
# The library is built by CMake directly
src = os.path.join(dft_lib_dir, libxgboost_path)
else:
# The library is built by setup.py
src = os.path.join(build_dir, libxgboost_path)
self.logger.info('Installing shared library: %s', src)
dst, _ = self.copy_file(src, dst)
outfiles.append(dst)
return outfiles
class Install(install.install): # pylint: disable=too-many-instance-attributes
'''An interface to install command, accepting XGBoost specific
arguments.
'''
user_options = install.install.user_options + list(
(k, v[0], v[1]) for k, v in USER_OPTIONS.items())
def initialize_options(self):
super().initialize_options()
self.use_openmp = 1
self.use_cuda = 0
self.use_nccl = 0
self.build_with_shared_nccl = 0
self.use_hdfs = 0
self.use_azure = 0
self.use_s3 = 0
self.plugin_lz4 = 0
self.plugin_dense_parser = 0
def run(self):
for k, v in USER_OPTIONS.items():
arg = k.replace('-', '_')
if hasattr(self, arg):
USER_OPTIONS[k] = (v[0], v[1], getattr(self, arg))
super().run()
if __name__ == '__main__':
# Supported commands:
# From internet:
# - pip install xgboost
# - pip install --no-binary :all: xgboost
# From source tree `xgboost/python-package`:
# - python setup.py build
# - python setup.py build_ext
# - python setup.py install
# - python setup.py sdist && pip install <sdist-name>
# - python setup.py bdist_wheel && pip install <wheel-name>
# When XGBoost is compiled directly with CMake:
# - pip install . -e
# - python setup.py develop # same as above
logging.basicConfig(level=logging.INFO)
setup(name='xgboost',
version=open(os.path.join(
CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
description="XGBoost Python Package",
long_description=open(os.path.join(CURRENT_DIR, 'README.rst'),
encoding='utf-8').read(),
install_requires=[
'numpy',
'scipy',
],
ext_modules=[CMakeExtension('libxgboost')],
cmdclass={
'build_ext': BuildExt,
'sdist': Sdist,
'install_lib': InstallLib,
'install': Install
},
extras_require={
'pandas': ['pandas'],
'sklearn': ['sklearn'],
'dask': ['dask', 'pandas', 'distributed'],
'datatable': ['datatable'],
'plotting': ['graphviz', 'matplotlib']
},
maintainer='Hyunsu Cho',
maintainer_email='chohyu01@cs.washington.edu',
zip_safe=False,
packages=find_packages(),
include_package_data=True,
license='Apache-2.0',
classifiers=['License :: OSI Approved :: Apache Software License',
'Development Status :: 5 - Production/Stable',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'],
python_requires='>=3.5',
url='https://github.com/dmlc/xgboost')
clean_up()

View File

@@ -1,85 +0,0 @@
# pylint: disable=invalid-name, exec-used, no-self-use, missing-docstring
"""Setup xgboost package."""
from __future__ import absolute_import
import sys
import os
from setuptools import setup, find_packages, Distribution
# import subprocess
sys.path.insert(0, '.')
# this script is for packing and shipping pip installation
# it builds xgboost code on the fly and packs for pip
# please don't use this file for installing from github
if os.name != 'nt': # if not windows, compile and install
# if not windows, compile and install
if len(sys.argv) < 2 or sys.argv[1] != 'sdist':
# do not build for sdist
os.system('sh ./xgboost/build-python.sh')
else:
print('Windows users please use github installation.')
sys.exit()
CURRENT_DIR = os.path.dirname(__file__)
class BinaryDistribution(Distribution):
"""Auxilliary class necessary to inform setuptools that this is a
non-generic, platform-specific package."""
def has_ext_modules(self):
return True
# We can not import `xgboost.libpath` in setup.py directly since xgboost/__init__.py
# import `xgboost.core` and finally will import `numpy` and `scipy` which are setup
# `install_requires`. That's why we're using `exec` here.
# do not import libpath for sdist
if len(sys.argv) < 2 or sys.argv[1] != 'sdist':
libpath_py = os.path.join(CURRENT_DIR, 'xgboost/libpath.py')
libpath = {'__file__': libpath_py}
exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath)
LIB_PATH = libpath['find_lib_path']()
setup(name='xgboost',
version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
description='XGBoost Python Package',
install_requires=[
'numpy',
'scipy',
],
extras_require={
'pandas': ['pandas'],
'sklearn': ['sklearn'],
'dask': ['dask', 'pandas', 'distributed'],
'datatable': ['datatable'],
'plotting': ['graphviz', 'matplotlib']
},
maintainer='Hyunsu Cho',
maintainer_email='chohyu01@cs.washington.edu',
zip_safe=False,
packages=find_packages(),
# don't need this and don't use this, give everything to MANIFEST.in
# package_dir = {'':'xgboost'},
# package_data = {'': ['*.txt','*.md','*.sh'],
# }
# this will use MANIFEST.in during install where we specify additional files,
# this is the golden line
include_package_data=True,
# !!! don't use data_files for creating pip installation,
# otherwise install_data process will copy it to
# root directory for some machines, and cause confusions on building
# data_files=[('xgboost', LIB_PATH)],
distclass=BinaryDistribution,
license='Apache-2.0',
classifiers=['License :: OSI Approved :: Apache Software License',
'Development Status :: 5 - Production/Stable',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'],
python_requires='>=3.5',
url='https://github.com/dmlc/xgboost')

View File

@@ -1,34 +0,0 @@
#!/bin/sh
# This is a simple script to make xgboost in MAC and Linux for python wrapper only
# Basically, it first try to make with OpenMP, if fails, disable OpenMP and make it again.
# This will automatically make xgboost for MAC users who don't have OpenMP support.
# In most cases, type make will give what you want.
# See additional instruction in doc/build.md
# note: this script is build for python package only, and it might have some filename
# conflict with build.sh which is for everything.
set -e
set -x
oldpath=`pwd`
cd ./xgboost/
#remove the pre-compiled .so and trigger the system's on-the-fly compiling
mkdir -p build
cd build
if cmake .. && make -j4; then
echo "Successfully built multi-thread xgboost"
else
echo "-----------------------------"
echo "Building multi-thread xgboost failed"
echo "Start to build single-thread xgboost"
cmake .. -DUSE_OPENMP=0
make -j4
echo "Successfully built single-thread xgboost; training speed may be suboptimal."
echo "To use all CPU cores for training jobs, install libomp package from Homebrew and re-install XGBoost"
fi
cd $oldpath
set +x

View File

@@ -1 +0,0 @@
../../dmlc-core

View File

@@ -1 +0,0 @@
../../include

View File

@@ -1 +0,0 @@
../../lib

View File

@@ -19,21 +19,27 @@ def find_lib_path():
List of all found library path to xgboost
"""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
# make pythonpack hack: copy this directory one level upper for setup.py
dll_path = [curr_path, os.path.join(curr_path, '../../lib/'),
os.path.join(curr_path, './lib/'),
os.path.join(sys.prefix, 'xgboost')]
dll_path = [
# normal, after installation `lib` is copied into Python package tree.
os.path.join(curr_path, 'lib'),
# editable installation, no copying is performed.
os.path.join(curr_path, os.path.pardir, os.path.pardir, 'lib'),
]
if sys.platform == 'win32':
if platform.architecture()[0] == '64bit':
dll_path.append(os.path.join(curr_path, '../../windows/x64/Release/'))
# hack for pip installation when copy all parent source directory here
dll_path.append(
os.path.join(curr_path, '../../windows/x64/Release/'))
# hack for pip installation when copy all parent source
# directory here
dll_path.append(os.path.join(curr_path, './windows/x64/Release/'))
else:
dll_path.append(os.path.join(curr_path, '../../windows/Release/'))
# hack for pip installation when copy all parent source directory here
# hack for pip installation when copy all parent source
# directory here
dll_path.append(os.path.join(curr_path, './windows/Release/'))
dll_path = [os.path.join(p, 'xgboost.dll') for p in dll_path]
elif sys.platform.startswith('linux') or sys.platform.startswith('freebsd'):
elif sys.platform.startswith('linux') or sys.platform.startswith(
'freebsd'):
dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
elif sys.platform == 'darwin':
dll_path = [os.path.join(p, 'libxgboost.dylib') for p in dll_path]
@@ -42,10 +48,13 @@ def find_lib_path():
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
# From github issues, most of installation errors come from machines w/o compilers
# XGBOOST_BUILD_DOC is defined by sphinx conf.
if not lib_path and not os.environ.get('XGBOOST_BUILD_DOC', False):
raise XGBoostLibraryNotFound(
'Cannot find XGBoost Library in the candidate path, ' +
'did you install compilers and run build.sh in root path?\n'
'List of candidates:\n' + ('\n'.join(dll_path)))
link = 'https://xgboost.readthedocs.io/en/latest/build.html'
msg = 'Cannot find XGBoost Library in the candidate path. ' + \
'List of candidates:\n- ' + ('\n- '.join(dll_path)) + \
'\nXGBoost Python package path: ' + curr_path + \
'\nsys.prefix: ' + sys.prefix + \
'\nSee: ' + link + ' for installing XGBoost.'
raise XGBoostLibraryNotFound(msg)
return lib_path

View File

@@ -1 +0,0 @@
../../make

View File

@@ -1 +0,0 @@
../../rabit

View File

@@ -1 +0,0 @@
../../src