From c024c42dce0a2c4d01881751f0e3dc7c7691ebf8 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 23 Nov 2021 23:24:52 +0800 Subject: [PATCH] Modernize XGBoost Python document. (#7468) * Use sphinx gallery to integrate examples. * Remove mock objects. * Add dask doc inventory. --- .gitignore | 5 +--- demo/guide-python/README.md | 19 ------------ demo/guide-python/README.rst | 5 ++++ demo/guide-python/basic_walkthrough.py | 4 +++ demo/guide-python/boost_from_prediction.py | 4 +++ demo/guide-python/callbacks.py | 3 +- demo/guide-python/cat_in_the_dat.py | 8 +++-- demo/guide-python/categorical.py | 12 +++++--- demo/guide-python/continuation.py | 3 +- demo/guide-python/cross_validation.py | 4 +++ demo/guide-python/custom_rmsle.py | 29 ++++++++++--------- demo/guide-python/custom_softmax.py | 14 +++++---- demo/guide-python/evals_result.py | 7 +++-- demo/guide-python/external_memory.py | 9 ++++-- demo/guide-python/feature_weights.py | 4 ++- demo/guide-python/gamma_regression.py | 4 +++ demo/guide-python/generalized_linear_model.py | 4 +++ demo/guide-python/predict_first_ntree.py | 4 +++ demo/guide-python/predict_leaf_indices.py | 4 +++ demo/guide-python/quantile_data_iterator.py | 4 ++- demo/guide-python/sklearn_evals_result.py | 7 +++-- demo/guide-python/sklearn_examples.py | 3 ++ demo/guide-python/sklearn_parallel.py | 4 +++ demo/guide-python/update_process.py | 8 +++-- doc/conf.py | 25 +++++++++------- doc/python/.gitignore | 1 + doc/python/index.rst | 2 +- doc/python/python_intro.rst | 2 +- doc/requirements.txt | 1 + doc/tutorials/categorical.rst | 11 +++---- 30 files changed, 130 insertions(+), 84 deletions(-) delete mode 100644 demo/guide-python/README.md create mode 100644 demo/guide-python/README.rst create mode 100644 doc/python/.gitignore diff --git a/.gitignore b/.gitignore index a099b3a27..e847342b1 100644 --- a/.gitignore +++ b/.gitignore @@ -130,7 +130,4 @@ credentials.csv # Visual Studio code + extensions .vscode .metals -.bloop - -# Demo -demo \ No newline at end of file +.bloop \ No newline at end of file diff --git a/demo/guide-python/README.md b/demo/guide-python/README.md deleted file mode 100644 index de6e8e024..000000000 --- a/demo/guide-python/README.md +++ /dev/null @@ -1,19 +0,0 @@ -XGBoost Python Feature Walkthrough -================================== -* [Basic walkthrough of wrappers](basic_walkthrough.py) -* [Re-implement RMSLE as customized metric and objective](custom_rmsle.py) -* [Re-Implement `multi:softmax` objective as customized objective](custom_softmax.py) -* [Boosting from existing prediction](boost_from_prediction.py) -* [Predicting using first n trees](predict_first_ntree.py) -* [Generalized Linear Model](generalized_linear_model.py) -* [Cross validation](cross_validation.py) -* [Predicting leaf indices](predict_leaf_indices.py) -* [Sklearn Wrapper](sklearn_examples.py) -* [Sklearn Parallel](sklearn_parallel.py) -* [Sklearn access evals result](sklearn_evals_result.py) -* [Access evals result](evals_result.py) -* [External Memory](external_memory.py) -* [Training continuation](continuation.py) -* [Feature weights for column sampling](feature_weights.py) -* [Basic Categorical data support](categorical.py) -* [Compare builtin categorical data support with one-hot encoding](cat_in_the_dat.py) \ No newline at end of file diff --git a/demo/guide-python/README.rst b/demo/guide-python/README.rst new file mode 100644 index 000000000..92a1cbf33 --- /dev/null +++ b/demo/guide-python/README.rst @@ -0,0 +1,5 @@ +XGBoost Python Feature Walkthrough +================================== + + +This is a collection of examples for using the XGBoost Python package. diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py index c977a4f48..e35a1e27c 100644 --- a/demo/guide-python/basic_walkthrough.py +++ b/demo/guide-python/basic_walkthrough.py @@ -1,3 +1,7 @@ +""" +Getting started with XGBoost +============================ +""" import numpy as np import scipy.sparse import pickle diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py index 3936f4f26..0be021725 100644 --- a/demo/guide-python/boost_from_prediction.py +++ b/demo/guide-python/boost_from_prediction.py @@ -1,3 +1,7 @@ +""" +Demo for boosting from prediction +================================= +""" import os import xgboost as xgb diff --git a/demo/guide-python/callbacks.py b/demo/guide-python/callbacks.py index d4362eeba..b2d1afb74 100644 --- a/demo/guide-python/callbacks.py +++ b/demo/guide-python/callbacks.py @@ -1,5 +1,6 @@ ''' -Demo for using and defining callback functions. +Demo for using and defining callback functions +============================================== .. versionadded:: 1.3.0 ''' diff --git a/demo/guide-python/cat_in_the_dat.py b/demo/guide-python/cat_in_the_dat.py index e502aab5a..35840b44f 100644 --- a/demo/guide-python/cat_in_the_dat.py +++ b/demo/guide-python/cat_in_the_dat.py @@ -1,4 +1,8 @@ -"""A simple demo for categorical data support using dataset from Kaggle categorical data +""" +Train XGBoost with cat_in_the_dat dataset +========================================= + +A simple demo for categorical data support using dataset from Kaggle categorical data tutorial. The excellent tutorial is at: @@ -8,7 +12,7 @@ And the data can be found at: https://www.kaggle.com/shahules/an-overview-of-encoding-techniques/data Also, see the tutorial for using XGBoost with categorical data: -https://xgboost.readthedocs.io/en/latest/tutorials/categorical.html +:doc:`/tutorials/categorical`. .. versionadded 1.6.0 diff --git a/demo/guide-python/categorical.py b/demo/guide-python/categorical.py index 9476c1ed6..7f358fcbb 100644 --- a/demo/guide-python/categorical.py +++ b/demo/guide-python/categorical.py @@ -1,12 +1,16 @@ -"""Experimental support for categorical data. After 1.5 XGBoost `gpu_hist` tree method -has experimental support for one-hot encoding based tree split. +""" +Getting started with categorical data +===================================== + +Experimental support for categorical data. After 1.5 XGBoost `gpu_hist` tree method has +experimental support for one-hot encoding based tree split. In before, users need to run an encoder themselves before passing the data into XGBoost, which creates a sparse matrix and potentially increase memory usage. This demo showcases the experimental categorical data support, more advanced features are planned. -Also, see the tutorial for using XGBoost with categorical data: -https://xgboost.readthedocs.io/en/latest/tutorials/categorical.html +Also, see :doc:`the tutorial ` for using XGBoost with categorical data + .. versionadded:: 1.5.0 diff --git a/demo/guide-python/continuation.py b/demo/guide-python/continuation.py index 7c6440eed..22fbfc3f7 100644 --- a/demo/guide-python/continuation.py +++ b/demo/guide-python/continuation.py @@ -1,5 +1,6 @@ """ -Demo for training continuation. +Demo for training continuation +============================== """ from sklearn.datasets import load_breast_cancer diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py index f81a138a1..2ca3f0201 100644 --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -1,3 +1,7 @@ +""" +Demo for using cross validation +=============================== +""" import os import numpy as np import xgboost as xgb diff --git a/demo/guide-python/custom_rmsle.py b/demo/guide-python/custom_rmsle.py index 0f8d5fcb2..66fbd83a0 100644 --- a/demo/guide-python/custom_rmsle.py +++ b/demo/guide-python/custom_rmsle.py @@ -1,16 +1,19 @@ -'''Demo for defining customized metric and objective. Notice that for -simplicity reason weight is not used in following example. In this -script, we implement the Squared Log Error (SLE) objective and RMSLE metric as customized -functions, then compare it with native implementation in XGBoost. +""" +Demo for defining a custom regression objective and metric +========================================================== -See doc/tutorials/custom_metric_obj.rst for a step by step -walkthrough, with other details. +Demo for defining customized metric and objective. Notice that for simplicity reason +weight is not used in following example. In this script, we implement the Squared Log +Error (SLE) objective and RMSLE metric as customized functions, then compare it with +native implementation in XGBoost. -The `SLE` objective reduces impact of outliers in training dataset, -hence here we also compare its performance with standard squared -error. +See doc/tutorials/custom_metric_obj.rst for a step by step walkthrough, with other +details. -''' +The `SLE` objective reduces impact of outliers in training dataset, hence here we also +compare its performance with standard squared error. + +""" import numpy as np import xgboost as xgb from typing import Tuple, Dict, List @@ -171,9 +174,6 @@ def plot_history(rmse_evals, rmsle_evals, py_rmsle_evals): ax2.plot(x, py_rmsle_evals['dtest']['PyRMSLE'], label='test-PyRMSLE') ax2.legend() - plt.show() - plt.close() - def main(args): dtrain, dtest = generate_data() @@ -183,9 +183,10 @@ def main(args): if args.plot != 0: plot_history(rmse_evals, rmsle_evals, py_rmsle_evals) + plt.show() -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( description='Arguments for custom RMSLE objective function demo.') parser.add_argument( diff --git a/demo/guide-python/custom_softmax.py b/demo/guide-python/custom_softmax.py index bb53d6e5c..e7064f463 100644 --- a/demo/guide-python/custom_softmax.py +++ b/demo/guide-python/custom_softmax.py @@ -1,10 +1,12 @@ -'''Demo for creating customized multi-class objective function. This demo is -only applicable after (excluding) XGBoost 1.0.0, as before this version XGBoost -returns transformed prediction for multi-class objective function. More -details in comments. +''' +Demo for creating customized multi-class objective function +=========================================================== -See https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html for detailed -tutorial and notes. +This demo is only applicable after (excluding) XGBoost 1.0.0, as before this version +XGBoost returns transformed prediction for multi-class objective function. More details +in comments. + +See :doc:`/tutorials/custom_metric_obj` for detailed tutorial and notes. ''' diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py index f9eeb23a6..bb4f44a9f 100644 --- a/demo/guide-python/evals_result.py +++ b/demo/guide-python/evals_result.py @@ -1,6 +1,7 @@ -## -# This script demonstrate how to access the eval metrics in xgboost -## +""" +This script demonstrate how to access the eval metrics +====================================================== +""" import os import xgboost as xgb diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py index 5cf72ba82..3e864a53e 100644 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -1,6 +1,9 @@ -"""Experimental support for external memory. This is similar to the one in -`quantile_data_iterator.py`, but for external memory instead of Quantile DMatrix. The -feature is not ready for production use yet. +""" +Experimental support for external memory +======================================== + +This is similar to the one in `quantile_data_iterator.py`, but for external memory +instead of Quantile DMatrix. The feature is not ready for production use yet. .. versionadded:: 1.5.0 diff --git a/demo/guide-python/feature_weights.py b/demo/guide-python/feature_weights.py index 83b62c002..f0b4907aa 100644 --- a/demo/guide-python/feature_weights.py +++ b/demo/guide-python/feature_weights.py @@ -1,4 +1,6 @@ -'''Using feature weight to change column sampling. +''' +Demo for using feature weight to change column sampling +======================================================= .. versionadded:: 1.3.0 ''' diff --git a/demo/guide-python/gamma_regression.py b/demo/guide-python/gamma_regression.py index 62bcf37cf..28b71a5d0 100644 --- a/demo/guide-python/gamma_regression.py +++ b/demo/guide-python/gamma_regression.py @@ -1,3 +1,7 @@ +""" +Demo for gamma regression +========================= +""" import xgboost as xgb import numpy as np diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py index f8d4efc79..f409fb960 100644 --- a/demo/guide-python/generalized_linear_model.py +++ b/demo/guide-python/generalized_linear_model.py @@ -1,3 +1,7 @@ +""" +Demo for GLM +============ +""" import os import xgboost as xgb ## diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index a663e672a..b56de0200 100644 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -1,3 +1,7 @@ +""" +Demo for prediction using number of trees +========================================= +""" import os import numpy as np import xgboost as xgb diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py index 96608d762..2bfc37e3a 100644 --- a/demo/guide-python/predict_leaf_indices.py +++ b/demo/guide-python/predict_leaf_indices.py @@ -1,3 +1,7 @@ +""" +Demo for obtaining leaf index +============================= +""" import os import xgboost as xgb diff --git a/demo/guide-python/quantile_data_iterator.py b/demo/guide-python/quantile_data_iterator.py index 97cbf388f..292cd127e 100644 --- a/demo/guide-python/quantile_data_iterator.py +++ b/demo/guide-python/quantile_data_iterator.py @@ -1,4 +1,6 @@ -'''A demo for defining data iterator. +''' +Demo for using data iterator with Quantile DMatrix +================================================== .. versionadded:: 1.2.0 diff --git a/demo/guide-python/sklearn_evals_result.py b/demo/guide-python/sklearn_evals_result.py index 410642135..c20328adb 100644 --- a/demo/guide-python/sklearn_evals_result.py +++ b/demo/guide-python/sklearn_evals_result.py @@ -1,6 +1,7 @@ -## -# This script demonstrate how to access the xgboost eval metrics by using sklearn -## +""" +Demo for accessing the xgboost eval metrics by using sklearn interface +====================================================================== +""" import xgboost as xgb import numpy as np diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py index c4ec64d8c..b5de652a6 100644 --- a/demo/guide-python/sklearn_examples.py +++ b/demo/guide-python/sklearn_examples.py @@ -1,4 +1,7 @@ ''' +Collection of examples for using sklearn interface +================================================== + Created on 1 Apr 2015 @author: Jamie Hall diff --git a/demo/guide-python/sklearn_parallel.py b/demo/guide-python/sklearn_parallel.py index f5ea0ac7e..dd472da67 100644 --- a/demo/guide-python/sklearn_parallel.py +++ b/demo/guide-python/sklearn_parallel.py @@ -1,3 +1,7 @@ +""" +Demo for using xgboost with sklearn +=================================== +""" from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_boston import xgboost as xgb diff --git a/demo/guide-python/update_process.py b/demo/guide-python/update_process.py index 53206f9c2..8ed03fd78 100644 --- a/demo/guide-python/update_process.py +++ b/demo/guide-python/update_process.py @@ -1,5 +1,9 @@ -"""Demo for using `process_type` with `prune` and `refresh`. Modifying existing trees is -not a well established use for XGBoost, so feel free to experiment. +""" +Demo for using `process_type` with `prune` and `refresh` +======================================================== + +Modifying existing trees is not a well established use for XGBoost, so feel free to +experiment. """ diff --git a/doc/conf.py b/doc/conf.py index a40e45570..a43d384fd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,12 +62,6 @@ libpath = os.path.join(curr_path, '../python-package/') sys.path.insert(0, libpath) sys.path.insert(0, curr_path) -# -- mock out modules -import mock # NOQA -MOCK_MODULES = ['scipy', 'scipy.sparse', 'sklearn', 'pandas'] -for mod_name in MOCK_MODULES: - sys.modules[mod_name] = mock.Mock() - # -- General configuration ------------------------------------------------ # General information about the project. @@ -90,10 +84,17 @@ extensions = [ 'sphinx.ext.napoleon', 'sphinx.ext.mathjax', 'sphinx.ext.intersphinx', + "sphinx_gallery.gen_gallery", 'breathe', 'recommonmark' ] +sphinx_gallery_conf = { + "examples_dirs": "../demo/guide-python", # path to your example scripts + "gallery_dirs": "python/examples", # path to where to save gallery generated output + "matplotlib_animations": True, +} + autodoc_typehints = "description" graphviz_output_format = 'png' @@ -201,11 +202,13 @@ latex_documents = [ ] intersphinx_mapping = { - 'python': ('https://docs.python.org/3.6', None), - 'numpy': ('http://docs.scipy.org/doc/numpy/', None), - 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), - 'pandas': ('http://pandas-docs.github.io/pandas-docs-travis/', None), - 'sklearn': ('http://scikit-learn.org/stable', None) + "python": ("https://docs.python.org/3.6", None), + "numpy": ("http://docs.scipy.org/doc/numpy/", None), + "scipy": ("http://docs.scipy.org/doc/scipy/reference/", None), + "pandas": ("http://pandas-docs.github.io/pandas-docs-travis/", None), + "sklearn": ("http://scikit-learn.org/stable", None), + "dask": ("https://docs.dask.org/en/stable/", None), + "distributed": ("https://distributed.dask.org/en/stable/", None), } diff --git a/doc/python/.gitignore b/doc/python/.gitignore new file mode 100644 index 000000000..b7265688a --- /dev/null +++ b/doc/python/.gitignore @@ -0,0 +1 @@ +examples \ No newline at end of file diff --git a/doc/python/index.rst b/doc/python/index.rst index 52045cdbc..cf986bff4 100644 --- a/doc/python/index.rst +++ b/doc/python/index.rst @@ -13,4 +13,4 @@ Contents python_api callbacks model - Python examples + examples/index diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst index a93439925..e31f705ea 100644 --- a/doc/python/python_intro.rst +++ b/doc/python/python_intro.rst @@ -5,7 +5,7 @@ This document gives a basic walkthrough of the xgboost package for Python. **List of other Helpful Links** -* `Python walkthrough code collections `_ +* :doc:`/python/examples/index` * :doc:`Python API Reference ` Install XGBoost diff --git a/doc/requirements.txt b/doc/requirements.txt index 3a386610a..8751da518 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -8,3 +8,4 @@ graphviz numpy recommonmark xgboost_ray +sphinx-gallery \ No newline at end of file diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst index 38ef9c027..a56b94647 100644 --- a/doc/tutorials/categorical.rst +++ b/doc/tutorials/categorical.rst @@ -57,13 +57,10 @@ can plot the model and calculate the global feature importance: The ``scikit-learn`` interface from dask is similar to single node version. The basic idea is create dataframe with category feature type, and tell XGBoost to use ``gpu_hist`` -with parameter ``enable_categorical``. See `this demo -`__ for a -worked example of using categorical data with ``scikit-learn`` interface. A comparison -between using one-hot encoded data and XGBoost's categorical data support can be found -`here -`__. - +with parameter ``enable_categorical``. See :ref:`sphx_glr_python_examples_categorical.py` +for a worked example of using categorical data with ``scikit-learn`` interface. A +comparison between using one-hot encoded data and XGBoost's categorical data support can +be found :ref:`sphx_glr_python_examples_cat_in_the_dat.py`. **********************