enable basic sphinx doc
This commit is contained in:
parent
8083c30e7b
commit
c43fee541d
@ -13,7 +13,7 @@ Contents
|
|||||||
--------
|
--------
|
||||||
* [What's New](#whats-new)
|
* [What's New](#whats-new)
|
||||||
* [Version](#version)
|
* [Version](#version)
|
||||||
* [Documentation](doc/README.md)
|
* [Documentation](doc/index.md)
|
||||||
* [Build Instruction](doc/build.md)
|
* [Build Instruction](doc/build.md)
|
||||||
* [Features](#features)
|
* [Features](#features)
|
||||||
* [Distributed XGBoost](multi-node)
|
* [Distributed XGBoost](multi-node)
|
||||||
@ -43,7 +43,6 @@ Version
|
|||||||
|
|
||||||
Features
|
Features
|
||||||
--------
|
--------
|
||||||
|
|
||||||
* Easily accessible through CLI, [python](https://github.com/dmlc/xgboost/blob/master/demo/guide-python/basic_walkthrough.py),
|
* Easily accessible through CLI, [python](https://github.com/dmlc/xgboost/blob/master/demo/guide-python/basic_walkthrough.py),
|
||||||
[R](https://github.com/dmlc/xgboost/blob/master/R-package/demo/basic_walkthrough.R),
|
[R](https://github.com/dmlc/xgboost/blob/master/R-package/demo/basic_walkthrough.R),
|
||||||
[Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/basic_walkthrough.jl)
|
[Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/basic_walkthrough.jl)
|
||||||
|
|||||||
7
doc/.gitignore
vendored
Normal file
7
doc/.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
html
|
||||||
|
latex
|
||||||
|
*.sh
|
||||||
|
_*
|
||||||
|
doxygen
|
||||||
|
parser.py
|
||||||
|
*.pyc
|
||||||
192
doc/Makefile
Normal file
192
doc/Makefile
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
# Makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line.
|
||||||
|
SPHINXOPTS =
|
||||||
|
SPHINXBUILD = sphinx-build
|
||||||
|
PAPER =
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# User-friendly check for sphinx-build
|
||||||
|
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||||
|
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Internal variables.
|
||||||
|
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||||
|
PAPEROPT_letter = -D latex_paper_size=letter
|
||||||
|
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
# the i18n builder cannot share the environment and doctrees with the others
|
||||||
|
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
|
||||||
|
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Please use \`make <target>' where <target> is one of"
|
||||||
|
@echo " html to make standalone HTML files"
|
||||||
|
@echo " dirhtml to make HTML files named index.html in directories"
|
||||||
|
@echo " singlehtml to make a single large HTML file"
|
||||||
|
@echo " pickle to make pickle files"
|
||||||
|
@echo " json to make JSON files"
|
||||||
|
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||||
|
@echo " qthelp to make HTML files and a qthelp project"
|
||||||
|
@echo " applehelp to make an Apple Help Book"
|
||||||
|
@echo " devhelp to make HTML files and a Devhelp project"
|
||||||
|
@echo " epub to make an epub"
|
||||||
|
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||||
|
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||||
|
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||||
|
@echo " text to make text files"
|
||||||
|
@echo " man to make manual pages"
|
||||||
|
@echo " texinfo to make Texinfo files"
|
||||||
|
@echo " info to make Texinfo files and run them through makeinfo"
|
||||||
|
@echo " gettext to make PO message catalogs"
|
||||||
|
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||||
|
@echo " xml to make Docutils-native XML files"
|
||||||
|
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||||
|
@echo " linkcheck to check all external links for integrity"
|
||||||
|
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||||
|
@echo " coverage to run coverage check of the documentation (if enabled)"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(BUILDDIR)/*
|
||||||
|
|
||||||
|
html:
|
||||||
|
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||||
|
|
||||||
|
dirhtml:
|
||||||
|
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||||
|
|
||||||
|
singlehtml:
|
||||||
|
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||||
|
|
||||||
|
pickle:
|
||||||
|
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the pickle files."
|
||||||
|
|
||||||
|
json:
|
||||||
|
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the JSON files."
|
||||||
|
|
||||||
|
htmlhelp:
|
||||||
|
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||||
|
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||||
|
|
||||||
|
qthelp:
|
||||||
|
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||||
|
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||||
|
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/rabit.qhcp"
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/rabit.qhc"
|
||||||
|
|
||||||
|
applehelp:
|
||||||
|
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
|
||||||
|
@echo "N.B. You won't be able to view it unless you put it in" \
|
||||||
|
"~/Library/Documentation/Help or install it in your application" \
|
||||||
|
"bundle."
|
||||||
|
|
||||||
|
devhelp:
|
||||||
|
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished."
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# mkdir -p $$HOME/.local/share/devhelp/rabit"
|
||||||
|
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/rabit"
|
||||||
|
@echo "# devhelp"
|
||||||
|
|
||||||
|
epub:
|
||||||
|
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||||
|
|
||||||
|
latex:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||||
|
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||||
|
"(use \`make latexpdf' here to do that automatically)."
|
||||||
|
|
||||||
|
latexpdf:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through pdflatex..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
latexpdfja:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
text:
|
||||||
|
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||||
|
|
||||||
|
man:
|
||||||
|
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||||
|
|
||||||
|
texinfo:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||||
|
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||||
|
"(use \`make info' here to do that automatically)."
|
||||||
|
|
||||||
|
info:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo "Running Texinfo files through makeinfo..."
|
||||||
|
make -C $(BUILDDIR)/texinfo info
|
||||||
|
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||||
|
|
||||||
|
gettext:
|
||||||
|
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||||
|
|
||||||
|
changes:
|
||||||
|
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||||
|
@echo
|
||||||
|
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||||
|
|
||||||
|
linkcheck:
|
||||||
|
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||||
|
@echo
|
||||||
|
@echo "Link check complete; look for any errors in the above output " \
|
||||||
|
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||||
|
|
||||||
|
doctest:
|
||||||
|
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||||
|
@echo "Testing of doctests in the sources finished, look at the " \
|
||||||
|
"results in $(BUILDDIR)/doctest/output.txt."
|
||||||
|
|
||||||
|
coverage:
|
||||||
|
$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
|
||||||
|
@echo "Testing of coverage in the sources finished, look at the " \
|
||||||
|
"results in $(BUILDDIR)/coverage/python.txt."
|
||||||
|
|
||||||
|
xml:
|
||||||
|
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||||
|
|
||||||
|
pseudoxml:
|
||||||
|
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
||||||
158
doc/conf.py
Normal file
158
doc/conf.py
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# documentation build configuration file, created by
|
||||||
|
# sphinx-quickstart on Thu Jul 23 19:40:08 2015.
|
||||||
|
#
|
||||||
|
# This file is execfile()d with the current directory set to its
|
||||||
|
# containing dir.
|
||||||
|
#
|
||||||
|
# Note that not all possible configuration values are present in this
|
||||||
|
# autogenerated file.
|
||||||
|
#
|
||||||
|
# All configuration values have a default; values that are commented out
|
||||||
|
# serve to show the default.
|
||||||
|
import sys
|
||||||
|
import os, subprocess
|
||||||
|
import shlex
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||||
|
libpath = os.path.join(curr_path, '../python-package/')
|
||||||
|
sys.path.insert(0, libpath)
|
||||||
|
sys.path.insert(0, curr_path)
|
||||||
|
|
||||||
|
from sphinx_util import MarkdownParser
|
||||||
|
|
||||||
|
# -- General configuration ------------------------------------------------
|
||||||
|
|
||||||
|
# General information about the project.
|
||||||
|
project = u'xgboost'
|
||||||
|
author = u'%s developers' % project
|
||||||
|
copyright = u'2015, %s' % author
|
||||||
|
github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/'
|
||||||
|
|
||||||
|
# add markdown parser
|
||||||
|
MarkdownParser.github_doc_root = github_doc_root
|
||||||
|
source_parsers = {
|
||||||
|
'.md': MarkdownParser,
|
||||||
|
}
|
||||||
|
os.environ['XGBOOST_BUILD_DOC'] = '1'
|
||||||
|
# Version information.
|
||||||
|
import xgboost
|
||||||
|
version = xgboost.__version__
|
||||||
|
release = xgboost.__version__
|
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be
|
||||||
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
|
||||||
|
extensions = [
|
||||||
|
'sphinx.ext.autodoc',
|
||||||
|
'sphinx.ext.napoleon',
|
||||||
|
'sphinx.ext.mathjax',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
|
templates_path = ['_templates']
|
||||||
|
|
||||||
|
# The suffix(es) of source filenames.
|
||||||
|
# You can specify multiple suffix as a list of string:
|
||||||
|
# source_suffix = ['.rst', '.md']
|
||||||
|
source_suffix = ['.rst', '.md']
|
||||||
|
|
||||||
|
# The encoding of source files.
|
||||||
|
#source_encoding = 'utf-8-sig'
|
||||||
|
|
||||||
|
# The master toctree document.
|
||||||
|
master_doc = 'index'
|
||||||
|
|
||||||
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
|
# for a list of supported languages.
|
||||||
|
#
|
||||||
|
# This is also used if you do content translation via gettext catalogs.
|
||||||
|
# Usually you set "language" from the command line for these cases.
|
||||||
|
language = None
|
||||||
|
|
||||||
|
# There are two options for replacing |today|: either, you set today to some
|
||||||
|
# non-false value, then it is used:
|
||||||
|
#today = ''
|
||||||
|
# Else, today_fmt is used as the format for a strftime call.
|
||||||
|
#today_fmt = '%B %d, %Y'
|
||||||
|
|
||||||
|
# List of patterns, relative to source directory, that match files and
|
||||||
|
# directories to ignore when looking for source files.
|
||||||
|
exclude_patterns = ['_build']
|
||||||
|
|
||||||
|
# The reST default role (used for this markup: `text`) to use for all
|
||||||
|
# documents.
|
||||||
|
#default_role = None
|
||||||
|
|
||||||
|
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||||
|
#add_function_parentheses = True
|
||||||
|
|
||||||
|
# If true, the current module name will be prepended to all description
|
||||||
|
# unit titles (such as .. function::).
|
||||||
|
#add_module_names = True
|
||||||
|
|
||||||
|
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||||
|
# output. They are ignored by default.
|
||||||
|
#show_authors = False
|
||||||
|
|
||||||
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
|
# A list of ignored prefixes for module index sorting.
|
||||||
|
#modindex_common_prefix = []
|
||||||
|
|
||||||
|
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||||
|
#keep_warnings = False
|
||||||
|
|
||||||
|
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||||
|
todo_include_todos = False
|
||||||
|
|
||||||
|
# -- Options for HTML output ----------------------------------------------
|
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
|
# a list of builtin themes.
|
||||||
|
# html_theme = 'alabaster'
|
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
|
html_static_path = ['_static']
|
||||||
|
|
||||||
|
# Output file base name for HTML help builder.
|
||||||
|
htmlhelp_basename = project + 'doc'
|
||||||
|
|
||||||
|
# -- Options for LaTeX output ---------------------------------------------
|
||||||
|
latex_elements = {
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
|
# (source start file, target name, title,
|
||||||
|
# author, documentclass [howto, manual, or own class]).
|
||||||
|
latex_documents = [
|
||||||
|
(master_doc, '%s.tex' % project, project,
|
||||||
|
author, 'manual'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# hook for doxygen
|
||||||
|
def run_doxygen(folder):
|
||||||
|
"""Run the doxygen make command in the designated folder."""
|
||||||
|
try:
|
||||||
|
retcode = subprocess.call("cd %s; make doxygen" % folder, shell=True)
|
||||||
|
if retcode < 0:
|
||||||
|
sys.stderr.write("doxygen terminated by signal %s" % (-retcode))
|
||||||
|
except OSError as e:
|
||||||
|
sys.stderr.write("doxygen execution failed: %s" % e)
|
||||||
|
|
||||||
|
def generate_doxygen_xml(app):
|
||||||
|
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
|
||||||
|
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
|
||||||
|
if read_the_docs_build:
|
||||||
|
run_doxygen('..')
|
||||||
|
|
||||||
|
def setup(app):
|
||||||
|
# Add hook for building doxygen xml when needed
|
||||||
|
# no c++ API for now
|
||||||
|
# app.connect("builder-inited", generate_doxygen_xml)
|
||||||
|
pass
|
||||||
@ -1,5 +1,5 @@
|
|||||||
Using XGBoost External Memory Version(beta)
|
Using XGBoost External Memory Version(beta)
|
||||||
====
|
===========================================
|
||||||
There is no big difference between using external memory version and in-memory version.
|
There is no big difference between using external memory version and in-memory version.
|
||||||
The only difference is the filename format.
|
The only difference is the filename format.
|
||||||
|
|
||||||
@ -19,13 +19,13 @@ You can find that there is additional ```#dtrain.cache``` following the libsvm f
|
|||||||
For CLI version, simply use ```"../data/agaricus.txt.train#dtrain.cache"``` in filename.
|
For CLI version, simply use ```"../data/agaricus.txt.train#dtrain.cache"``` in filename.
|
||||||
|
|
||||||
Performance Note
|
Performance Note
|
||||||
====
|
----------------
|
||||||
* the parameter ```nthread``` should be set to number of ***real*** cores
|
* the parameter ```nthread``` should be set to number of ***real*** cores
|
||||||
- Most modern CPU offer hyperthreading, which means you can have a 4 core cpu with 8 threads
|
- Most modern CPU offer hyperthreading, which means you can have a 4 core cpu with 8 threads
|
||||||
- Set nthread to be 4 for maximum performance in such case
|
- Set nthread to be 4 for maximum performance in such case
|
||||||
|
|
||||||
Distributed Version
|
Distributed Version
|
||||||
====
|
-------------------
|
||||||
The external memory mode naturally works on distributed version, you can simply set path like
|
The external memory mode naturally works on distributed version, you can simply set path like
|
||||||
```
|
```
|
||||||
data = "hdfs:///path-to-data/#dtrain.cache"
|
data = "hdfs:///path-to-data/#dtrain.cache"
|
||||||
@ -34,8 +34,8 @@ xgboost will cache the data to the local position. When you run on YARN, the cur
|
|||||||
so that you can directly use ```dtrain.cache``` to cache to current folder.
|
so that you can directly use ```dtrain.cache``` to cache to current folder.
|
||||||
|
|
||||||
|
|
||||||
Usage Note:
|
Usage Note
|
||||||
====
|
----------
|
||||||
* This is a experimental version
|
* This is a experimental version
|
||||||
- If you like to try and test it, report results to https://github.com/dmlc/xgboost/issues/244
|
- If you like to try and test it, report results to https://github.com/dmlc/xgboost/issues/244
|
||||||
* Currently only importing from libsvm format is supported
|
* Currently only importing from libsvm format is supported
|
||||||
|
|||||||
@ -1,6 +1,9 @@
|
|||||||
List of Documentations
|
XGBoost Documentation
|
||||||
====
|
=====================
|
||||||
* [Using XGBoost in Python](python.md)
|
|
||||||
|
|
||||||
|
|
||||||
|
* [Using XGBoost in Python](python/python_intro.md)
|
||||||
* [Using XGBoost in R](../R-package/vignettes/xgboostPresentation.Rmd)
|
* [Using XGBoost in R](../R-package/vignettes/xgboostPresentation.Rmd)
|
||||||
* [Learning to use xgboost by example](../demo)
|
* [Learning to use xgboost by example](../demo)
|
||||||
* [External Memory Version](external_memory.md)
|
* [External Memory Version](external_memory.md)
|
||||||
@ -11,14 +14,15 @@ List of Documentations
|
|||||||
- [Notes on Parameter Tunning](param_tuning.md)
|
- [Notes on Parameter Tunning](param_tuning.md)
|
||||||
* Learning about the model: [Introduction to Boosted Trees](http://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf)
|
* Learning about the model: [Introduction to Boosted Trees](http://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf)
|
||||||
|
|
||||||
How to get started
|
|
||||||
====
|
How to Get Started
|
||||||
|
------------------
|
||||||
* Try to read the [binary classification example](../demo/binary_classification) for getting started example
|
* Try to read the [binary classification example](../demo/binary_classification) for getting started example
|
||||||
* Find the guide specific language guide above for the language you like to use
|
* Find the guide specific language guide above for the language you like to use
|
||||||
* [Learning to use xgboost by example](../demo) contains lots of useful examples
|
* [Learning to use xgboost by example](../demo) contains lots of useful examples
|
||||||
|
|
||||||
Highlight Links
|
Example Highlight Links
|
||||||
====
|
-----------------------
|
||||||
This section is about blogposts, presentation and videos discussing how to use xgboost to solve your interesting problem. If you think something belongs to here, send a pull request.
|
This section is about blogposts, presentation and videos discussing how to use xgboost to solve your interesting problem. If you think something belongs to here, send a pull request.
|
||||||
* [Kaggle CrowdFlower winner's solution by Chenglong Chen](https://github.com/ChenglongChen/Kaggle_CrowdFlower)
|
* [Kaggle CrowdFlower winner's solution by Chenglong Chen](https://github.com/ChenglongChen/Kaggle_CrowdFlower)
|
||||||
* [Kaggle Malware Prediction winner's solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware)
|
* [Kaggle Malware Prediction winner's solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware)
|
||||||
@ -27,8 +31,12 @@ This section is about blogposts, presentation and videos discussing how to use x
|
|||||||
* Video tutorial: [Better Optimization with Repeated Cross Validation and the XGBoost model](https://www.youtube.com/watch?v=Og7CGAfSr_Y)
|
* Video tutorial: [Better Optimization with Repeated Cross Validation and the XGBoost model](https://www.youtube.com/watch?v=Og7CGAfSr_Y)
|
||||||
* [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)
|
* [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)
|
||||||
|
|
||||||
|
API Reference
|
||||||
|
-------------
|
||||||
|
* [Python API Reference](python/python_api.rst)
|
||||||
|
|
||||||
Contribution
|
Contribution
|
||||||
====
|
------------
|
||||||
Contribution of documents and use-cases are welcomed!
|
Contribution of documents and use-cases are welcomed!
|
||||||
* This package use Google C++ style
|
* This package use Google C++ style
|
||||||
* Check tool of codestyle
|
* Check tool of codestyle
|
||||||
@ -1,5 +1,6 @@
|
|||||||
Input Format
|
Text Input Format of DMatrix
|
||||||
====
|
============================
|
||||||
|
|
||||||
## Basic Input Format
|
## Basic Input Format
|
||||||
As we have mentioned, XGBoost takes LibSVM format. For training or predicting, XGBoost takes an instance file with the format as below:
|
As we have mentioned, XGBoost takes LibSVM format. For training or predicting, XGBoost takes an instance file with the format as below:
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
XGBoost Parameters
|
XGBoost Parameters
|
||||||
====
|
==================
|
||||||
Before running XGboost, we must set three types of parameters, general parameters, booster parameters and task parameters:
|
Before running XGboost, we must set three types of parameters, general parameters, booster parameters and task parameters:
|
||||||
- General parameters relates to which booster we are using to do boosting, commonly tree or linear model
|
- General parameters relates to which booster we are using to do boosting, commonly tree or linear model
|
||||||
- Booster parameters depends on which booster you have chosen
|
- Booster parameters depends on which booster you have chosen
|
||||||
|
|||||||
36
doc/python/python_api.rst
Normal file
36
doc/python/python_api.rst
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
Python API Reference
|
||||||
|
====================
|
||||||
|
This page gives the Python API reference of xgboost.
|
||||||
|
|
||||||
|
Core Data Structure
|
||||||
|
-------------------
|
||||||
|
.. automodule:: xgboost.core
|
||||||
|
|
||||||
|
.. autoclass:: xgboost.DMatrix
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
.. autoclass:: xgboost.Booster
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
|
Learning API
|
||||||
|
------------
|
||||||
|
.. automodule:: xgboost.training
|
||||||
|
|
||||||
|
.. autofunction:: xgboost.train
|
||||||
|
|
||||||
|
.. autofunction:: xgboost.cv
|
||||||
|
|
||||||
|
|
||||||
|
Scikit-Learn API
|
||||||
|
----------------
|
||||||
|
.. automodule:: xgboost.sklearn
|
||||||
|
.. autoclass:: xgboost.XGBRegressor
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
.. autoclass:: xgboost.XGBClassifier
|
||||||
|
:members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
@ -1,5 +1,5 @@
|
|||||||
XGBoost Python Module
|
XGBoost Python Module
|
||||||
====
|
=====================
|
||||||
|
|
||||||
This page will introduce XGBoost Python module, including:
|
This page will introduce XGBoost Python module, including:
|
||||||
* [Building and Import](#building-and-import)
|
* [Building and Import](#building-and-import)
|
||||||
@ -8,6 +8,7 @@ This page will introduce XGBoost Python module, including:
|
|||||||
* [Train Model](#training-model)
|
* [Train Model](#training-model)
|
||||||
* [Early Stopping](#early-stopping)
|
* [Early Stopping](#early-stopping)
|
||||||
* [Prediction](#prediction)
|
* [Prediction](#prediction)
|
||||||
|
* [API Reference](python_api.md)
|
||||||
|
|
||||||
A [walk through python example](https://github.com/tqchen/xgboost/blob/master/demo/guide-python) for UCI Mushroom dataset is provided.
|
A [walk through python example](https://github.com/tqchen/xgboost/blob/master/demo/guide-python) for UCI Mushroom dataset is provided.
|
||||||
|
|
||||||
50
doc/sphinx_util.py
Normal file
50
doc/sphinx_util.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Helper hacking utilty function for customization."""
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# TODO: make less hacky way than this one
|
||||||
|
if os.environ.get('READTHEDOCS', None) == 'True':
|
||||||
|
subprocess.call('cd ..; rm -rf recommonmark;' +
|
||||||
|
'git clone https://github.com/tqchen/recommonmark;' +
|
||||||
|
'cp recommonmark/recommonmark/parser.py doc/parser', shell=True)
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath('..'))
|
||||||
|
import parser
|
||||||
|
|
||||||
|
class MarkdownParser(parser.CommonMarkParser):
|
||||||
|
github_doc_root = None
|
||||||
|
doc_suffix = set(['md', 'rst'])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def remap_url(url):
|
||||||
|
if MarkdownParser.github_doc_root is None or url is None:
|
||||||
|
return url
|
||||||
|
if url.startswith('#'):
|
||||||
|
return url
|
||||||
|
arr = url.split('#', 1)
|
||||||
|
ssuffix = arr[0].rsplit('.', 1)
|
||||||
|
|
||||||
|
if len(ssuffix) == 2 and (ssuffix[-1] in MarkdownParser.doc_suffix
|
||||||
|
and arr[0].find('://') == -1):
|
||||||
|
arr[0] = ssuffix[0] + '.html'
|
||||||
|
return '#'.join(arr)
|
||||||
|
else:
|
||||||
|
if arr[0].find('://') == -1:
|
||||||
|
return MarkdownParser.github_doc_root + url
|
||||||
|
else:
|
||||||
|
return url
|
||||||
|
|
||||||
|
def reference(self, block):
|
||||||
|
block.destination = remap_url(block.destination)
|
||||||
|
return super(MarkdownParser, self).reference(block)
|
||||||
|
|
||||||
|
# inplace modify the function in recommonmark module to allow link remap
|
||||||
|
old_ref = parser.reference
|
||||||
|
|
||||||
|
def reference(block):
|
||||||
|
block.destination = MarkdownParser.remap_url(block.destination)
|
||||||
|
return old_ref(block)
|
||||||
|
|
||||||
|
parser.reference = reference
|
||||||
@ -10,3 +10,7 @@ from .training import train, cv
|
|||||||
from .sklearn import XGBModel, XGBClassifier, XGBRegressor
|
from .sklearn import XGBModel, XGBClassifier, XGBRegressor
|
||||||
|
|
||||||
__version__ = '0.4'
|
__version__ = '0.4'
|
||||||
|
|
||||||
|
__all__ = ['DMatrix', 'Booster',
|
||||||
|
'train', 'cv',
|
||||||
|
'XGBModel', 'XGBClassifier', 'XGBRegressor']
|
||||||
|
|||||||
@ -50,20 +50,24 @@ def find_lib_path():
|
|||||||
else:
|
else:
|
||||||
dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
|
dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
|
||||||
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
|
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
|
||||||
if len(lib_path) == 0:
|
if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False):
|
||||||
raise XGBoostLibraryNotFound(
|
raise XGBoostLibraryNotFound(
|
||||||
'Cannot find XGBoost Libarary in the candicate path %s,' +
|
'Cannot find XGBoost Libarary in the candicate path, ' +
|
||||||
'Did you run build.sh in root oath?' % str(dll_path))
|
'did you run build.sh in root path?\n'
|
||||||
|
'List of candidates:\n' + ('\n'.join(dll_path)))
|
||||||
return lib_path
|
return lib_path
|
||||||
|
|
||||||
|
|
||||||
def _load_lib():
|
def _load_lib():
|
||||||
"""Load xgboost Library."""
|
"""Load xgboost Library."""
|
||||||
lib_path = find_lib_path()
|
lib_path = find_lib_path()
|
||||||
|
if len(lib_path) == 0:
|
||||||
|
return None
|
||||||
lib = ctypes.cdll.LoadLibrary(lib_path[0])
|
lib = ctypes.cdll.LoadLibrary(lib_path[0])
|
||||||
lib.XGBGetLastError.restype = ctypes.c_char_p
|
lib.XGBGetLastError.restype = ctypes.c_char_p
|
||||||
|
|
||||||
return lib
|
return lib
|
||||||
|
|
||||||
|
|
||||||
# load the XGBoost library globally
|
# load the XGBoost library globally
|
||||||
_LIB = _load_lib()
|
_LIB = _load_lib()
|
||||||
|
|
||||||
@ -119,6 +123,7 @@ class DMatrix(object):
|
|||||||
|
|
||||||
DMatrix is a internal data structure that used by XGBoost
|
DMatrix is a internal data structure that used by XGBoost
|
||||||
which is optimized for both memory efficiency and training speed.
|
which is optimized for both memory efficiency and training speed.
|
||||||
|
You can construct DMatrix from numpy.arrays
|
||||||
"""
|
"""
|
||||||
def __init__(self, data, label=None, missing=0.0, weight=None, silent=False):
|
def __init__(self, data, label=None, missing=0.0, weight=None, silent=False):
|
||||||
"""
|
"""
|
||||||
@ -127,15 +132,16 @@ class DMatrix(object):
|
|||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
data : string/numpy array/scipy.sparse
|
data : string/numpy array/scipy.sparse
|
||||||
Data source, string type is the path of svmlight format txt file,
|
Data source of DMatrix.
|
||||||
xgb buffer or path to cache_file
|
When data is string type, it represents the path libsvm format txt file,
|
||||||
label : list or numpy 1-D array (optional)
|
or binary file that xgboost can read from.
|
||||||
|
label : list or numpy 1-D array, optional
|
||||||
Label of the training data.
|
Label of the training data.
|
||||||
missing : float
|
missing : float, optional
|
||||||
Value in the data which needs to be present as a missing value.
|
Value in the data which needs to be present as a missing value.
|
||||||
weight : list or numpy 1-D array (optional)
|
weight : list or numpy 1-D array , optional
|
||||||
Weight for each instance.
|
Weight for each instance.
|
||||||
silent: boolean
|
silent : boolean, optional
|
||||||
Whether print messages during construction
|
Whether print messages during construction
|
||||||
"""
|
"""
|
||||||
# force into void_p, mac need to pass things in as void_p
|
# force into void_p, mac need to pass things in as void_p
|
||||||
@ -469,13 +475,22 @@ class Booster(object):
|
|||||||
"""Copy the booster object.
|
"""Copy the booster object.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
--------
|
-------
|
||||||
|
booster: `Booster`
|
||||||
a copied booster model
|
a copied booster model
|
||||||
"""
|
"""
|
||||||
return self.__copy__()
|
return self.__copy__()
|
||||||
|
|
||||||
def set_param(self, params, value=None):
|
def set_param(self, params, value=None):
|
||||||
"""Set parameters into the DMatrix."""
|
"""Set parameters into the Booster.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
params: dict/list/str
|
||||||
|
list of key,value paris, dict of key to value or simply str key
|
||||||
|
value: optional
|
||||||
|
value of the specified parameter, when params is str key
|
||||||
|
"""
|
||||||
if isinstance(params, collections.Mapping):
|
if isinstance(params, collections.Mapping):
|
||||||
params = params.items()
|
params = params.items()
|
||||||
elif isinstance(params, STRING_TYPES) and value is not None:
|
elif isinstance(params, STRING_TYPES) and value is not None:
|
||||||
@ -485,7 +500,7 @@ class Booster(object):
|
|||||||
|
|
||||||
def update(self, dtrain, iteration, fobj=None):
|
def update(self, dtrain, iteration, fobj=None):
|
||||||
"""
|
"""
|
||||||
Update (one iteration).
|
Update for one iteration, with objective function calculated internally.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@ -507,7 +522,7 @@ class Booster(object):
|
|||||||
|
|
||||||
def boost(self, dtrain, grad, hess):
|
def boost(self, dtrain, grad, hess):
|
||||||
"""
|
"""
|
||||||
Update.
|
Boost the booster for one iteration, with customized gradient statistics.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@ -542,7 +557,8 @@ class Booster(object):
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
evaluation result
|
result: str
|
||||||
|
Evaluation result string.
|
||||||
"""
|
"""
|
||||||
if feval is None:
|
if feval is None:
|
||||||
for d in evals:
|
for d in evals:
|
||||||
@ -567,18 +583,21 @@ class Booster(object):
|
|||||||
def eval(self, data, name='eval', iteration=0):
|
def eval(self, data, name='eval', iteration=0):
|
||||||
"""Evaluate the model on mat.
|
"""Evaluate the model on mat.
|
||||||
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
---------
|
----------
|
||||||
data : DMatrix
|
data : DMatrix
|
||||||
The dmatrix storing the input.
|
The dmatrix storing the input.
|
||||||
|
|
||||||
name : str (default = 'eval')
|
name : str, optional
|
||||||
The name of the dataset
|
The name of the dataset.
|
||||||
|
|
||||||
|
iteration : int, optional
|
||||||
|
The current iteration number.
|
||||||
|
|
||||||
iteration : int (default = 0)
|
Returns
|
||||||
The current iteration number
|
-------
|
||||||
|
result: str
|
||||||
|
Evaluation result string.
|
||||||
"""
|
"""
|
||||||
return self.eval_set([(data, name)], iteration)
|
return self.eval_set([(data, name)], iteration)
|
||||||
|
|
||||||
|
|||||||
@ -206,9 +206,9 @@ class XGBModel(XGBModelBase):
|
|||||||
|
|
||||||
class XGBClassifier(XGBModel, XGBClassifierBase):
|
class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||||
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
|
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
|
||||||
__doc__ = """
|
__doc__ = """Implementation of the scikit-learn API for XGBoost classification.
|
||||||
Implementation of the scikit-learn API for XGBoost classification
|
|
||||||
""" + "\n".join(XGBModel.__doc__.split('\n')[2:])
|
""" + '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||||
|
|
||||||
def __init__(self, max_depth=3, learning_rate=0.1,
|
def __init__(self, max_depth=3, learning_rate=0.1,
|
||||||
n_estimators=100, silent=True,
|
n_estimators=100, silent=True,
|
||||||
@ -335,7 +335,5 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
|
|
||||||
class XGBRegressor(XGBModel, XGBRegressorBase):
|
class XGBRegressor(XGBModel, XGBRegressorBase):
|
||||||
# pylint: disable=missing-docstring
|
# pylint: disable=missing-docstring
|
||||||
__doc__ = """
|
__doc__ = """Implementation of the scikit-learn API for XGBoost regression.
|
||||||
Implementation of the scikit-learn API for XGBoost regression
|
""" + '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||||
""" + "\n".join(XGBModel.__doc__.split('\n')[2:])
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user