diff --git a/Makefile b/Makefile index fe4759adc..80a3e30be 100644 --- a/Makefile +++ b/Makefile @@ -207,33 +207,12 @@ doxygen: # create standalone python tar file. pypack: ${XGBOOST_DYLIB} - cp ${XGBOOST_DYLIB} python-package/xgboost + cp ${XGBOOST_DYLIB} python-package/xgboost/lib cd python-package; tar cf xgboost.tar xgboost; cd .. # create pip source dist (sdist) pack for PyPI pippack: clean_all - rm -rf xgboost-python -# remove symlinked directories in python-package/xgboost - rm -rf python-package/xgboost/lib - rm -rf python-package/xgboost/dmlc-core - rm -rf python-package/xgboost/include - rm -rf python-package/xgboost/make - rm -rf python-package/xgboost/rabit - rm -rf python-package/xgboost/src - cp -r python-package xgboost-python - cp -r CMakeLists.txt xgboost-python/xgboost/ - cp -r cmake xgboost-python/xgboost/ - cp -r plugin xgboost-python/xgboost/ - cp -r make xgboost-python/xgboost/ - cp -r src xgboost-python/xgboost/ - cp -r tests xgboost-python/xgboost/ - cp -r include xgboost-python/xgboost/ - cp -r dmlc-core xgboost-python/xgboost/ - cp -r rabit xgboost-python/xgboost/ -# Use setup_pip.py instead of setup.py - mv xgboost-python/setup_pip.py xgboost-python/setup.py -# Build sdist tarball - cd xgboost-python; python setup.py sdist; mv dist/*.tar.gz ..; cd .. + cd python-package; python setup.py sdist; mv dist/*.tar.gz ..; cd .. # Script to make a clean installable R package. Rpack: clean_all @@ -254,9 +233,9 @@ Rpack: clean_all cp -r dmlc-core/include xgboost/src/dmlc-core/include cp -r dmlc-core/src xgboost/src/dmlc-core/src cp ./LICENSE xgboost - # Modify PKGROOT in Makevars.in +# Modify PKGROOT in Makevars.in cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in - # Configure Makevars.win (Windows-specific Makevars, likely using MinGW) +# Configure Makevars.win (Windows-specific Makevars, likely using MinGW) cp xgboost/src/Makevars.in xgboost/src/Makevars.win cat xgboost/src/Makevars.in| sed '3s/.*/ENABLE_STD_THREAD=0/' > xgboost/src/Makevars.win sed -i -e 's/@OPENMP_CXXFLAGS@/$$\(SHLIB_OPENMP_CXXFLAGS\)/g' xgboost/src/Makevars.win diff --git a/doc/build.rst b/doc/build.rst index 1bb6b78c9..636bfc02b 100644 --- a/doc/build.rst +++ b/doc/build.rst @@ -15,16 +15,20 @@ Installation Guide * The binary wheel will support GPU algorithms (`gpu_hist`) on machines with NVIDIA GPUs. Please note that **training with multiple GPUs is only supported for Linux platform**. See :doc:`gpu/index`. * Currently, we provide binary wheels for 64-bit Linux and Windows. - * Nightly builds are available. You can now run *pip install https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/xgboost-[version]+[commit hash]-py2.py3-none-manylinux1_x86_64.whl* to install the nightly build with the given commit hash. See `this page `_ to see the list of all nightly builds. + * Nightly builds are available. You can now run + + .. code-block:: bash + + pip install https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/xgboost-[version]+[commithash]-py2.py3-none-manylinux1_x86_64.whl + + to install the nightly build with the given commit hash. See `this page + `_ to see the + list of all nightly builds. **************************** Building XGBoost from source **************************** -This page gives instructions on how to build and install XGBoost from scratch on various systems. It consists of two steps: - -1. First build the shared library from the C++ codes (``libxgboost.so`` for Linux/OSX and ``xgboost.dll`` for Windows). - (For R-package installation, please directly refer to `R Package Installation`_.) -2. Then install the language packages (e.g. Python Package). +This page gives instructions on how to build and install XGBoost from scratch on various systems. .. note:: Use of Git submodules @@ -49,11 +53,10 @@ to ask questions at `the user forum `_. * `Building the Shared Library`_ - - `Building on Ubuntu/Debian`_ + - `Building on Linux Distributions`_ - `Building on OSX`_ - `Building on Windows`_ - `Building with GPU support`_ - - `Customized Building`_ * `Python Package Installation`_ * `R Package Installation`_ @@ -71,15 +74,17 @@ Our goal is to build the shared library: - On Linux/OSX the target library is ``libxgboost.so`` - On Windows the target library is ``xgboost.dll`` -The minimal building requirement is +This shared library is used by different language bindings (with some additions depending +on the binding you choose). For building language specific package, see corresponding +sections in this document. The minimal building requirement is - A recent C++ compiler supporting C++11 (g++-5.0 or higher) -- CMake 3.3 or higher (3.12 for building with CUDA) +- CMake 3.12 or higher. -For a list of CMake options, see ``#-- Options`` in CMakeLists.txt on top of source tree. +For a list of CMake options, see ``#-- Options`` in CMakeLists.txt on top level of source tree. -Building on Ubuntu/Debian -========================= +Building on Linux distributions +=============================== On Ubuntu, one builds XGBoost by running CMake: @@ -90,7 +95,7 @@ On Ubuntu, one builds XGBoost by running CMake: mkdir build cd build cmake .. - make -j4 + make -j$(nproc) Building on OSX =============== @@ -241,56 +246,101 @@ The above cmake configuration run will create an ``xgboost.sln`` solution file i To speed up compilation, run multiple jobs in parallel by appending option ``-- /MP``. -Customized Building -=================== +Makefiles +========= -We recommend the use of CMake for most use cases. See the full range of building options in CMakeLists.txt. - -Alternatively, you may use Makefile. The Makefile uses a configuration file ``config.mk``, which lets you modify several compilation flags: -- Whether to enable support for various distributed filesystems such as HDFS and Amazon S3 -- Which compiler to use -- And some more - -To customize, first copy ``make/config.mk`` to the project root and then modify the copy. +It's only used for submitting R CRAN package and creating shorthands for running linters, +performing packaging tasks etc. So the remaining makefiles are legacy. Python Package Installation =========================== -The Python package is located at ``python-package/``. -There are several ways to install the package: +The Python package is located at ``python-package/``. There are several ways to build and +install the package from source: -1. Install system-wide, which requires root permission: +1. Use Python setuptools directly + +The XGBoost Python package supports most of the setuptools commands, here is a list of tested commands: .. code-block:: bash - cd python-package; sudo python setup.py install + python setup.py install # Install the XGBoost to your current Python environment. + python setup.py build # Build the Python package. + python setup.py build_ext # Build only the C++ core. + python setup.py sdist # Create a source distribution + python setup.py bdist # Create a binary distribution + python setup.py bdist_wheel # Create a binary distribution with wheel format -You will however need Python ``distutils`` module for this to -work. It is often part of the core Python package or it can be installed using your -package manager, e.g. in Debian use +Running ``python setup.py install`` will compile XGBoost using default CMake flags. For +passing additional compilation options, append the flags to the command. For example, to +enable CUDA acceleration and NCCL (distributed GPU) support: + +.. code-block:: bash + + python setup.py install --use-cuda --use-nccl + +Please refer to ``setup.py`` for a complete list of avaiable options. Some other options +used for development are only available for using CMake directly. See next section on +how to use CMake with setuptools manually. + +You can install the created distribution packages using pip. For example, after running +``sdist`` setuptools command, a tar ball similar to ``xgboost-1.0.0.tar.gz`` will be +created under the ``dist`` directory. Then you can install it by invoking the following +command under ``dist`` directory: + +.. code-block:: bash + + # under python-package directory + cd dist + pip install ./xgboost-1.0.0.tar.gz + + +For details about these commands, please refer to the official document of `setuptools +`_, or just Google "how to install Python +package from source". XGBoost Python package follows the general convention. Setuptools +is usually available with your Python distribution, if not you can install it via system +command. For example on Debian or Ubuntu: .. code-block:: bash sudo apt-get install python-setuptools -.. note:: Re-compiling XGBoost - If you recompiled XGBoost, then you need to reinstall it again to make the new library take effect. +For cleaning up the directory after running above commands, ``python setup.py clean`` is +not sufficient. After copying out the build result, simply running ``git clean -xdf`` +under ``python-package`` is an efficient way to remove generated cache files. If you find +weird behaviors in Python build or running linter, it might be caused by those cached +files. -2. Only set the environment variable ``PYTHONPATH`` to tell Python where to find - the library. For example, assume we cloned ``xgboost`` on the home directory - ``~``. then we can added the following line in ``~/.bashrc``. - This option is **recommended for developers** who change the code frequently. The changes will be immediately reflected once you pulled the code and rebuild the project (no need to call ``setup`` again). +For using develop command (editable installation), see next section. + +.. code-block:: + + python setup.py develop # Create a editable installation. + pip install -e . # Same as above, but carried out by pip. + + +2. Build C++ core with CMake first + +This is mostly for C++ developers who don't want to go through the hooks in Python +setuptools. You can build C++ library directly using CMake as described in above +sections. After compilation, a shared object (or called dynamic linked library, jargon +depending on your platform) will appear in XGBoost's source tree under ``lib/`` directory. +On Linux distributions it's ``lib/libxgboost.so``. From there all Python setuptools +commands will reuse that shared object instead of compiling it again. This is especially +convenient if you are using the editable installation, where the installed package is +simply a link to the source tree. We can perform rapid testing during development. Here +is a simple bash script does that: .. code-block:: bash - export PYTHONPATH=~/xgboost/python-package - -3. Install only for the current user. - -.. code-block:: bash - - cd python-package; python setup.py develop --user + # Under xgboost source tree. + mkdir build + cd build + cmake .. + make -j$(nproc) + cd ../python-package + pip install -e . # or equivalently python setup.py develop .. _mingw_python: @@ -310,6 +360,7 @@ So you may want to build XGBoost with GCC own your own risk. This presents some 4. Don't use ``-march=native`` gcc flag. Using it causes the Python interpreter to crash if the DLL was actually used. 5. You may need to provide the lib with the runtime libs. If ``mingw32/bin`` is not in ``PATH``, build a wheel (``python setup.py bdist_wheel``), open it with an archiver and put the needed dlls to the directory where ``xgboost.dll`` is situated. Then you can install the wheel with ``pip``. + R Package Installation ====================== @@ -335,8 +386,9 @@ You can install XGBoost from CRAN just like any other R package: Installing the development version ---------------------------------- -Make sure you have installed git and a recent C++ compiler supporting C++11 (e.g., g++-4.8 or higher). -On Windows, Rtools must be installed, and its bin directory has to be added to ``PATH`` during the installation. +Make sure you have installed git and a recent C++ compiler supporting C++11 (See above +sections for requirements of building C++ core). On Windows, Rtools must be installed, +and its bin directory has to be added to ``PATH`` during the installation. Due to the use of git-submodules, ``devtools::install_github`` can no longer be used to install the latest version of R package. Thus, one has to run git to check out the code first: @@ -350,10 +402,11 @@ Thus, one has to run git to check out the code first: mkdir build cd build cmake .. -DR_LIB=ON - make -j4 + make -j$(nproc) make install -If all fails, try `Building the shared library`_ to see whether a problem is specific to R package or not. +If all fails, try `Building the shared library`_ to see whether a problem is specific to R +package or not. Notice that the R package is installed by CMake directly. .. _r_gpu_support: @@ -369,7 +422,7 @@ On Linux, starting from the XGBoost directory type: mkdir build cd build cmake .. -DUSE_CUDA=ON -DR_LIB=ON - make install -j + make install -j$(nproc) When default target is used, an R package shared library would be built in the ``build`` area. The ``install`` target, in addition, assembles the package files with this shared library under ``build/R-package`` and runs ``R CMD INSTALL``. @@ -407,29 +460,6 @@ Trouble Shooting git submodule update && make clean_all && make -j4 -2. Compile failed after ``config.mk`` is modified - - Need to clean all first: - - .. code-block:: bash - - make clean_all && make -j4 - -3. ``Makefile: dmlc-core/make/dmlc.mk: No such file or directory`` - - We need to recursively clone the submodule: - - .. code-block:: bash - - git submodule init - git submodule update - - Alternatively, do another clone - - .. code-block:: bash - - git clone https://github.com/dmlc/xgboost --recursive - Building the Documentation ========================== @@ -447,5 +477,8 @@ XGBoost uses `Sphinx `_ for documentation - guzzle_sphinx_theme - recommonmark - mock + - sh + - graphviz + - matplotlib Under ``xgboost/doc`` directory, run ``make `` with ```` replaced by the format you want. For a list of supported formats, run ``make help`` under the same directory. diff --git a/doc/conf.py b/doc/conf.py index 8a40da042..f07678f38 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,26 +18,40 @@ from urllib.error import HTTPError from recommonmark.parser import CommonMarkParser import sys import re -import os, subprocess -import shlex +import os +import subprocess import guzzle_sphinx_theme git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None) if git_branch is None: - # If SPHINX_GIT_BRANCH environment variable is not given, run git to determine branch name - git_branch = [re.sub(r'origin/', '', x.lstrip(' ')) for x in str(git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')] + # If SPHINX_GIT_BRANCH environment variable is not given, run git + # to determine branch name + git_branch = [ + re.sub(r'origin/', '', x.lstrip(' ')) for x in str( + git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n') + ] git_branch = [x for x in git_branch if 'HEAD' not in x] print('git_branch = {}'.format(git_branch[0])) try: - filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch[0])) - call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True) + filename, _ = urllib.request.urlretrieve( + 'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format( + git_branch[0])) + call( + 'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}' + .format(filename), + shell=True) except HTTPError: - print('JVM doc not found. Skipping...') + print('JVM doc not found. Skipping...') try: - filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.format(git_branch[0])) - call('mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'.format(filename), shell=True) + filename, _ = urllib.request.urlretrieve( + 'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'. + format(git_branch[0])) + call( + 'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen' + .format(filename), + shell=True) except HTTPError: - print('C API doc not found. Skipping...') + print('C API doc not found. Skipping...') # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -48,22 +62,22 @@ sys.path.insert(0, libpath) sys.path.insert(0, curr_path) # -- mock out modules -import mock +import mock # NOQA MOCK_MODULES = ['scipy', 'scipy.sparse', 'sklearn', 'pandas'] for mod_name in MOCK_MODULES: - sys.modules[mod_name] = mock.Mock() + sys.modules[mod_name] = mock.Mock() # -- General configuration ------------------------------------------------ # General information about the project. project = u'xgboost' author = u'%s developers' % project -copyright = u'2019, %s' % author +copyright = u'2020, %s' % author github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/' os.environ['XGBOOST_BUILD_DOC'] = '1' # Version information. -import xgboost +import xgboost # NOQA version = xgboost.__version__ release = xgboost.__version__ @@ -99,7 +113,7 @@ source_parsers = { source_suffix = ['.rst', '.md'] # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' @@ -115,9 +129,9 @@ autoclass_content = 'both' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -126,27 +140,27 @@ html_extra_path = ['./tmp'] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -191,27 +205,32 @@ latex_documents = [ author, 'manual'), ] -intersphinx_mapping = {'python': ('https://docs.python.org/3.6', None), - 'numpy': ('http://docs.scipy.org/doc/numpy/', None), - 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), - 'pandas': ('http://pandas-docs.github.io/pandas-docs-travis/', None), - 'sklearn': ('http://scikit-learn.org/stable', None)} +intersphinx_mapping = { + 'python': ('https://docs.python.org/3.6', None), + 'numpy': ('http://docs.scipy.org/doc/numpy/', None), + 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), + 'pandas': ('http://pandas-docs.github.io/pandas-docs-travis/', None), + 'sklearn': ('http://scikit-learn.org/stable', None) +} + # hook for doxygen def run_doxygen(folder): - """Run the doxygen make command in the designated folder.""" - try: - retcode = subprocess.call("cd %s; make doxygen" % folder, shell=True) - if retcode < 0: - sys.stderr.write("doxygen terminated by signal %s" % (-retcode)) - except OSError as e: - sys.stderr.write("doxygen execution failed: %s" % e) + """Run the doxygen make command in the designated folder.""" + try: + retcode = subprocess.call("cd %s; make doxygen" % folder, shell=True) + if retcode < 0: + sys.stderr.write("doxygen terminated by signal %s" % (-retcode)) + except OSError as e: + sys.stderr.write("doxygen execution failed: %s" % e) + def generate_doxygen_xml(app): - """Run the doxygen make commands if we're on the ReadTheDocs server""" - read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' - if read_the_docs_build: - run_doxygen('..') + """Run the doxygen make commands if we're on the ReadTheDocs server""" + read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' + if read_the_docs_build: + run_doxygen('..') + def setup(app): - app.add_stylesheet('custom.css') + app.add_stylesheet('custom.css') diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in index 38b1a77a4..1bf9ad3f9 100644 --- a/python-package/MANIFEST.in +++ b/python-package/MANIFEST.in @@ -1,16 +1,11 @@ -include *.md *.rst +include *.rst +include xgboost/VERSION +include xgboost/CMakeLists.txt + recursive-include xgboost * recursive-include xgboost/include * recursive-include xgboost/src * -recursive-include xgboost/make * recursive-include xgboost/rabit * -recursive-include xgboost/lib * recursive-include xgboost/dmlc-core * -#exclude pre-compiled .o and .a file for less confusions -#make sure .a files are all removed for forcing compiling -#include the pre-compiled .so is needed as a placeholder -#since it will be copy after compiling on the fly -global-exclude *.o -global-exclude *.a -global-exclude *.pyo -global-exclude *.pyc + +global-exclude *.py[oc] \ No newline at end of file diff --git a/python-package/prep_pip.sh b/python-package/prep_pip.sh deleted file mode 100644 index b5b98e00a..000000000 --- a/python-package/prep_pip.sh +++ /dev/null @@ -1,11 +0,0 @@ -# this script is for preparation for PyPI installation package, -# please don't use it for installing xgboost from github - -# after executing `make pippack`, cd xgboost-python, -#run this script and get the sdist tar.gz in ./dist/ -sh ./xgboost/build-python.sh -cp setup_pip.py setup.py -python setup.py sdist - -#make sure you know what you gonna do, and uncomment the following line -#python setup.py register upload diff --git a/python-package/setup.py b/python-package/setup.py index 220c238c7..bbe1fce77 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -1,68 +1,313 @@ -# pylint: disable=invalid-name, exec-used """Setup xgboost package.""" -from __future__ import absolute_import -import io -import sys import os -from setuptools import setup, find_packages +import shutil +import subprocess +import logging +import distutils +import sys +from platform import system +from setuptools import setup, find_packages, Extension +from setuptools.command import build_ext, sdist, install_lib, install -# import subprocess -sys.path.insert(0, '.') +# You can't use `pip install .` as pip copies setup.py to a temporary +# directory, parent directory is no longer reachable (isolated build) . +CURRENT_DIR = os.path.abspath(os.path.dirname(__file__)) +sys.path.insert(0, CURRENT_DIR) -CURRENT_DIR = os.path.dirname(__file__) +USER_OPTIONS = { + 'use-openmp': (None, 'Build with OpenMP support.', 1), + 'use-cuda': (None, 'Build with GPU acceleration.', 0), + 'use-nccl': (None, 'Build with NCCL to enable distributed GPU support.', 0), + 'build-with-shared-nccl': (None, 'Build with shared NCCL library.', 0), + 'use-hdfs': (None, 'Build with HDFS support', 0), + 'use-azure': (None, 'Build with AZURE support.', 0), + 'use-s3': (None, 'Build with S3 support', 0), + 'plugin-lz4': (None, 'Build lz4 plugin.', 0), + 'plugin-dense-parser': (None, 'Build dense parser plugin.', 0) +} -# We can not import `xgboost.libpath` in setup.py directly since xgboost/__init__.py -# import `xgboost.core` and finally will import `numpy` and `scipy` which are setup -# `install_requires`. That's why we're using `exec` here. -libpath_py = os.path.join(CURRENT_DIR, 'xgboost/libpath.py') -libpath = {'__file__': libpath_py} -exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath) +NEED_CLEAN_TREE = set() +NEED_CLEAN_FILE = set() +BUILD_TEMP_DIR = None -LIB_PATH = [] -for libfile in libpath['find_lib_path'](): - try: - relpath = os.path.relpath(libfile, CURRENT_DIR) - LIB_PATH.append(relpath) - break # need only one - except ValueError: - continue -print("Install libxgboost from: %s" % LIB_PATH) +def lib_name(): + '''Return platform dependent shared object name.''' + if system() == 'Linux' or system().upper().endswith('BSD'): + name = 'libxgboost.so' + elif system() == 'Darwin': + name = 'libxgboost.dylib' + elif system() == 'Windows': + name = 'xgboost.dll' + return name -# Please use setup_pip.py for generating and deploying pip installation -# detailed instruction in setup_pip.py -setup(name='xgboost', - version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(), - description="XGBoost Python Package", - long_description=io.open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8').read(), - install_requires=[ - 'numpy', - 'scipy', - ], - extras_require={ - 'pandas': ['pandas'], - 'sklearn': ['sklearn'], - 'dask': ['dask', 'pandas', 'distributed'], - 'datatable': ['datatable'], - 'plotting': ['graphviz', 'matplotlib'] - }, - maintainer='Hyunsu Cho', - maintainer_email='chohyu01@cs.washington.edu', - zip_safe=False, - packages=find_packages(), - # this will use MANIFEST.in during install where we specify additional files, - # this is the golden line - include_package_data=True, - data_files=[('xgboost', LIB_PATH)], - license='Apache-2.0', - classifiers=['License :: OSI Approved :: Apache Software License', - 'Development Status :: 5 - Production/Stable', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8'], - python_requires='>=3.5', - url='https://github.com/dmlc/xgboost') + +def copy_tree(src_dir, target_dir): + '''Copy source tree into build directory.''' + def clean_copy_tree(src, dst): + distutils.dir_util.copy_tree(src, dst) + NEED_CLEAN_TREE.add(os.path.abspath(dst)) + + def clean_copy_file(src, dst): + distutils.file_util.copy_file(src, dst) + NEED_CLEAN_FILE.add(os.path.abspath(dst)) + + src = os.path.join(src_dir, 'src') + inc = os.path.join(src_dir, 'include') + dmlc_core = os.path.join(src_dir, 'dmlc-core') + rabit = os.path.join(src_dir, 'rabit') + cmake = os.path.join(src_dir, 'cmake') + plugin = os.path.join(src_dir, 'plugin') + + clean_copy_tree(src, os.path.join(target_dir, 'src')) + clean_copy_tree(inc, os.path.join(target_dir, 'include')) + clean_copy_tree(dmlc_core, os.path.join(target_dir, 'dmlc-core')) + clean_copy_tree(rabit, os.path.join(target_dir, 'rabit')) + clean_copy_tree(cmake, os.path.join(target_dir, 'cmake')) + clean_copy_tree(plugin, os.path.join(target_dir, 'plugin')) + + cmake_list = os.path.join(src_dir, 'CMakeLists.txt') + clean_copy_file(cmake_list, os.path.join(target_dir, 'CMakeLists.txt')) + lic = os.path.join(src_dir, 'LICENSE') + clean_copy_file(lic, os.path.join(target_dir, 'LICENSE')) + + +def clean_up(): + '''Removed copied files.''' + for path in NEED_CLEAN_TREE: + shutil.rmtree(path) + for path in NEED_CLEAN_FILE: + os.remove(path) + + +class CMakeExtension(Extension): # pylint: disable=too-few-public-methods + '''Wrapper for extension''' + def __init__(self, name): + super().__init__(name=name, sources=[]) + + +class BuildExt(build_ext.build_ext): + '''Custom build_ext command using CMake.''' + + logger = logging.getLogger('XGBoost build_ext') + + # pylint: disable=too-many-arguments,no-self-use + def build(self, src_dir, build_dir, generator, build_tool=None, use_omp=1): + '''Build the core library with CMake.''' + cmake_cmd = ['cmake', src_dir, generator] + for k, v in USER_OPTIONS.items(): + arg = k.replace('-', '_').upper() + value = str(v[2]) + cmake_cmd.append('-D' + arg + '=' + value) + if k == 'USE_OPENMP' and use_omp == 0: + continue + + if system() == 'Windows': + cmake_cmd.append('--build') + + subprocess.check_call(cmake_cmd, cwd=build_dir) + + if system() != 'Windows': + nproc = os.cpu_count() + subprocess.check_call([build_tool, '-j' + str(nproc)], + cwd=build_dir) + + def build_cmake_extension(self): + '''Configure and build using CMake''' + src_dir = 'xgboost' + try: + copy_tree(os.path.join(CURRENT_DIR, os.path.pardir), + os.path.join(self.build_temp, src_dir)) + except Exception: # pylint: disable=broad-except + copy_tree(src_dir, os.path.join(self.build_temp, src_dir)) + build_dir = self.build_temp + global BUILD_TEMP_DIR # pylint: disable=global-statement + BUILD_TEMP_DIR = build_dir + libxgboost = os.path.abspath( + os.path.join(CURRENT_DIR, os.path.pardir, 'lib', lib_name())) + + if os.path.exists(libxgboost): + self.logger.info('Found shared library, skipping build.') + return + + self.logger.info('Building from source. %s', libxgboost) + if not os.path.exists(build_dir): + os.mkdir(build_dir) + if shutil.which('ninja'): + build_tool = 'ninja' + else: + build_tool = 'make' + + if system() == 'Windows': + # Pick up from LGB, just test every possible tool chain. + for vs in ('-GVisual Studio 16 2019', '-GVisual Studio 15 2017', + '-GVisual Studio 14 2015', '-GMinGW Makefiles'): + try: + self.build(src_dir, build_dir, vs) + self.logger.info( + '%s is used for building Windows distribution.', vs) + break + except subprocess.CalledProcessError: + continue + else: + gen = '-GNinja' if build_tool == 'ninja' else '-GUnix Makefiles' + try: + self.build(src_dir, build_dir, gen, build_tool, use_omp=1) + except subprocess.CalledProcessError: + self.logger.warning('Disabling OpenMP support.') + self.build(src_dir, build_dir, gen, build_tool, use_omp=0) + + def build_extension(self, ext): + '''Override the method for dispatching.''' + if isinstance(ext, CMakeExtension): + self.build_cmake_extension() + else: + super().build_extension(ext) + + def copy_extensions_to_source(self): + '''Dummy override. Invoked during editable installation. Our binary + should available in `lib`. + + ''' + if not os.path.exists( + os.path.join(CURRENT_DIR, os.path.pardir, 'lib', lib_name())): + raise ValueError('For using editable installation, please ' + + 'build the shared object first with CMake.') + + +class Sdist(sdist.sdist): # pylint: disable=too-many-ancestors + '''Copy c++ source into Python directory.''' + logger = logging.getLogger('xgboost sdist') + + def run(self): + copy_tree(os.path.join(CURRENT_DIR, os.path.pardir), + os.path.join(CURRENT_DIR, 'xgboost')) + libxgboost = os.path.join( + CURRENT_DIR, os.path.pardir, 'lib', lib_name()) + if os.path.exists(libxgboost): + self.logger.warning( + 'Found shared library, removing to avoid being included in source distribution.' + ) + os.remove(libxgboost) + super().run() + + +class InstallLib(install_lib.install_lib): + '''Copy shared object into installation directory.''' + logger = logging.getLogger('xgboost install_lib') + + def install(self): + outfiles = super().install() + lib_dir = os.path.join(self.install_dir, 'xgboost', 'lib') + if not os.path.exists(lib_dir): + os.mkdir(lib_dir) + dst = os.path.join(self.install_dir, 'xgboost', 'lib', lib_name()) + + global BUILD_TEMP_DIR # pylint: disable=global-statement + libxgboost_path = lib_name() + + dft_lib_dir = os.path.join(CURRENT_DIR, os.path.pardir, 'lib') + build_dir = os.path.join(BUILD_TEMP_DIR, 'xgboost', 'lib') + + if os.path.exists(os.path.join(dft_lib_dir, libxgboost_path)): + # The library is built by CMake directly + src = os.path.join(dft_lib_dir, libxgboost_path) + else: + # The library is built by setup.py + src = os.path.join(build_dir, libxgboost_path) + self.logger.info('Installing shared library: %s', src) + dst, _ = self.copy_file(src, dst) + outfiles.append(dst) + return outfiles + + +class Install(install.install): # pylint: disable=too-many-instance-attributes + '''An interface to install command, accepting XGBoost specific + arguments. + + ''' + user_options = install.install.user_options + list( + (k, v[0], v[1]) for k, v in USER_OPTIONS.items()) + + def initialize_options(self): + super().initialize_options() + self.use_openmp = 1 + self.use_cuda = 0 + self.use_nccl = 0 + self.build_with_shared_nccl = 0 + + self.use_hdfs = 0 + self.use_azure = 0 + self.use_s3 = 0 + + self.plugin_lz4 = 0 + self.plugin_dense_parser = 0 + + def run(self): + for k, v in USER_OPTIONS.items(): + arg = k.replace('-', '_') + if hasattr(self, arg): + USER_OPTIONS[k] = (v[0], v[1], getattr(self, arg)) + super().run() + + +if __name__ == '__main__': + # Supported commands: + # From internet: + # - pip install xgboost + # - pip install --no-binary :all: xgboost + + # From source tree `xgboost/python-package`: + # - python setup.py build + # - python setup.py build_ext + # - python setup.py install + # - python setup.py sdist && pip install + # - python setup.py bdist_wheel && pip install + + # When XGBoost is compiled directly with CMake: + # - pip install . -e + # - python setup.py develop # same as above + logging.basicConfig(level=logging.INFO) + setup(name='xgboost', + version=open(os.path.join( + CURRENT_DIR, 'xgboost/VERSION')).read().strip(), + description="XGBoost Python Package", + long_description=open(os.path.join(CURRENT_DIR, 'README.rst'), + encoding='utf-8').read(), + install_requires=[ + 'numpy', + 'scipy', + ], + ext_modules=[CMakeExtension('libxgboost')], + cmdclass={ + 'build_ext': BuildExt, + 'sdist': Sdist, + 'install_lib': InstallLib, + 'install': Install + }, + extras_require={ + 'pandas': ['pandas'], + 'sklearn': ['sklearn'], + 'dask': ['dask', 'pandas', 'distributed'], + 'datatable': ['datatable'], + 'plotting': ['graphviz', 'matplotlib'] + }, + maintainer='Hyunsu Cho', + maintainer_email='chohyu01@cs.washington.edu', + zip_safe=False, + packages=find_packages(), + include_package_data=True, + license='Apache-2.0', + classifiers=['License :: OSI Approved :: Apache Software License', + 'Development Status :: 5 - Production/Stable', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8'], + python_requires='>=3.5', + url='https://github.com/dmlc/xgboost') + + clean_up() diff --git a/python-package/setup_pip.py b/python-package/setup_pip.py deleted file mode 100644 index 6f584eefc..000000000 --- a/python-package/setup_pip.py +++ /dev/null @@ -1,85 +0,0 @@ -# pylint: disable=invalid-name, exec-used, no-self-use, missing-docstring -"""Setup xgboost package.""" -from __future__ import absolute_import -import sys -import os -from setuptools import setup, find_packages, Distribution -# import subprocess -sys.path.insert(0, '.') - -# this script is for packing and shipping pip installation -# it builds xgboost code on the fly and packs for pip -# please don't use this file for installing from github - -if os.name != 'nt': # if not windows, compile and install - # if not windows, compile and install - if len(sys.argv) < 2 or sys.argv[1] != 'sdist': - # do not build for sdist - os.system('sh ./xgboost/build-python.sh') -else: - print('Windows users please use github installation.') - sys.exit() - -CURRENT_DIR = os.path.dirname(__file__) - - -class BinaryDistribution(Distribution): - """Auxilliary class necessary to inform setuptools that this is a - non-generic, platform-specific package.""" - def has_ext_modules(self): - return True - - -# We can not import `xgboost.libpath` in setup.py directly since xgboost/__init__.py -# import `xgboost.core` and finally will import `numpy` and `scipy` which are setup -# `install_requires`. That's why we're using `exec` here. -# do not import libpath for sdist -if len(sys.argv) < 2 or sys.argv[1] != 'sdist': - libpath_py = os.path.join(CURRENT_DIR, 'xgboost/libpath.py') - libpath = {'__file__': libpath_py} - exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath) - - LIB_PATH = libpath['find_lib_path']() - -setup(name='xgboost', - version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(), - description='XGBoost Python Package', - install_requires=[ - 'numpy', - 'scipy', - ], - extras_require={ - 'pandas': ['pandas'], - 'sklearn': ['sklearn'], - 'dask': ['dask', 'pandas', 'distributed'], - 'datatable': ['datatable'], - 'plotting': ['graphviz', 'matplotlib'] - }, - maintainer='Hyunsu Cho', - maintainer_email='chohyu01@cs.washington.edu', - zip_safe=False, - packages=find_packages(), - # don't need this and don't use this, give everything to MANIFEST.in - # package_dir = {'':'xgboost'}, - # package_data = {'': ['*.txt','*.md','*.sh'], - # } - # this will use MANIFEST.in during install where we specify additional files, - # this is the golden line - include_package_data=True, - # !!! don't use data_files for creating pip installation, - # otherwise install_data process will copy it to - # root directory for some machines, and cause confusions on building - # data_files=[('xgboost', LIB_PATH)], - distclass=BinaryDistribution, - license='Apache-2.0', - classifiers=['License :: OSI Approved :: Apache Software License', - 'Development Status :: 5 - Production/Stable', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8'], - python_requires='>=3.5', - url='https://github.com/dmlc/xgboost') diff --git a/python-package/xgboost/build-python.sh b/python-package/xgboost/build-python.sh deleted file mode 100755 index 7d70df4dc..000000000 --- a/python-package/xgboost/build-python.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh -# This is a simple script to make xgboost in MAC and Linux for python wrapper only -# Basically, it first try to make with OpenMP, if fails, disable OpenMP and make it again. -# This will automatically make xgboost for MAC users who don't have OpenMP support. -# In most cases, type make will give what you want. - -# See additional instruction in doc/build.md - -# note: this script is build for python package only, and it might have some filename -# conflict with build.sh which is for everything. - -set -e -set -x - -oldpath=`pwd` -cd ./xgboost/ - -#remove the pre-compiled .so and trigger the system's on-the-fly compiling -mkdir -p build -cd build -if cmake .. && make -j4; then - echo "Successfully built multi-thread xgboost" -else - echo "-----------------------------" - echo "Building multi-thread xgboost failed" - echo "Start to build single-thread xgboost" - cmake .. -DUSE_OPENMP=0 - make -j4 - echo "Successfully built single-thread xgboost; training speed may be suboptimal." - echo "To use all CPU cores for training jobs, install libomp package from Homebrew and re-install XGBoost" -fi -cd $oldpath - -set +x diff --git a/python-package/xgboost/dmlc-core b/python-package/xgboost/dmlc-core deleted file mode 120000 index 570e8ecf6..000000000 --- a/python-package/xgboost/dmlc-core +++ /dev/null @@ -1 +0,0 @@ -../../dmlc-core \ No newline at end of file diff --git a/python-package/xgboost/include b/python-package/xgboost/include deleted file mode 120000 index fcffffbed..000000000 --- a/python-package/xgboost/include +++ /dev/null @@ -1 +0,0 @@ -../../include \ No newline at end of file diff --git a/python-package/xgboost/lib b/python-package/xgboost/lib deleted file mode 120000 index 58677ddb4..000000000 --- a/python-package/xgboost/lib +++ /dev/null @@ -1 +0,0 @@ -../../lib \ No newline at end of file diff --git a/python-package/xgboost/libpath.py b/python-package/xgboost/libpath.py index a313a1b87..65f7403e6 100644 --- a/python-package/xgboost/libpath.py +++ b/python-package/xgboost/libpath.py @@ -19,21 +19,27 @@ def find_lib_path(): List of all found library path to xgboost """ curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) - # make pythonpack hack: copy this directory one level upper for setup.py - dll_path = [curr_path, os.path.join(curr_path, '../../lib/'), - os.path.join(curr_path, './lib/'), - os.path.join(sys.prefix, 'xgboost')] + dll_path = [ + # normal, after installation `lib` is copied into Python package tree. + os.path.join(curr_path, 'lib'), + # editable installation, no copying is performed. + os.path.join(curr_path, os.path.pardir, os.path.pardir, 'lib'), + ] if sys.platform == 'win32': if platform.architecture()[0] == '64bit': - dll_path.append(os.path.join(curr_path, '../../windows/x64/Release/')) - # hack for pip installation when copy all parent source directory here + dll_path.append( + os.path.join(curr_path, '../../windows/x64/Release/')) + # hack for pip installation when copy all parent source + # directory here dll_path.append(os.path.join(curr_path, './windows/x64/Release/')) else: dll_path.append(os.path.join(curr_path, '../../windows/Release/')) - # hack for pip installation when copy all parent source directory here + # hack for pip installation when copy all parent source + # directory here dll_path.append(os.path.join(curr_path, './windows/Release/')) dll_path = [os.path.join(p, 'xgboost.dll') for p in dll_path] - elif sys.platform.startswith('linux') or sys.platform.startswith('freebsd'): + elif sys.platform.startswith('linux') or sys.platform.startswith( + 'freebsd'): dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path] elif sys.platform == 'darwin': dll_path = [os.path.join(p, 'libxgboost.dylib') for p in dll_path] @@ -42,10 +48,13 @@ def find_lib_path(): lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] - # From github issues, most of installation errors come from machines w/o compilers + # XGBOOST_BUILD_DOC is defined by sphinx conf. if not lib_path and not os.environ.get('XGBOOST_BUILD_DOC', False): - raise XGBoostLibraryNotFound( - 'Cannot find XGBoost Library in the candidate path, ' + - 'did you install compilers and run build.sh in root path?\n' - 'List of candidates:\n' + ('\n'.join(dll_path))) + link = 'https://xgboost.readthedocs.io/en/latest/build.html' + msg = 'Cannot find XGBoost Library in the candidate path. ' + \ + 'List of candidates:\n- ' + ('\n- '.join(dll_path)) + \ + '\nXGBoost Python package path: ' + curr_path + \ + '\nsys.prefix: ' + sys.prefix + \ + '\nSee: ' + link + ' for installing XGBoost.' + raise XGBoostLibraryNotFound(msg) return lib_path diff --git a/python-package/xgboost/make b/python-package/xgboost/make deleted file mode 120000 index 67d4229d2..000000000 --- a/python-package/xgboost/make +++ /dev/null @@ -1 +0,0 @@ -../../make \ No newline at end of file diff --git a/python-package/xgboost/rabit b/python-package/xgboost/rabit deleted file mode 120000 index 2dbb88037..000000000 --- a/python-package/xgboost/rabit +++ /dev/null @@ -1 +0,0 @@ -../../rabit \ No newline at end of file diff --git a/python-package/xgboost/src b/python-package/xgboost/src deleted file mode 120000 index 929cb3dc9..000000000 --- a/python-package/xgboost/src +++ /dev/null @@ -1 +0,0 @@ -../../src \ No newline at end of file diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index 05632b7d3..4a01455d8 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -6,12 +6,12 @@ source $HOME/miniconda/bin/activate if [ ${TASK} == "python_sdist_test" ]; then set -e - make pippack conda activate python3 python --version conda install numpy scipy + make pippack python -m pip install xgboost-*.tar.gz -v --user python -c 'import xgboost' || exit -1 fi