Bump version to 1.7.6 (#9305 )

[backport] Optimize prediction with QuantileDMatrix. (#9096 ) (#9303 )
[backport] Fix monotone constraints on CPU. (#9122 ) (#9287 )
2023-06-16 03:33:16 +08:00 · 2023-06-15 23:32:03 +08:00 · 2023-06-11 17:51:25 +08:00 · 2023-06-11 13:22:23 +08:00 · 2023-06-11 13:18:23 +08:00 · 2023-06-11 11:08:45 +08:00
33 changed files with 284 additions and 233 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -148,66 +148,13 @@ jobs:
      run: |
        LINT_LANG=cpp make lint

-  doxygen:
-    runs-on: ubuntu-latest
-    name: Generate C/C++ API doc using Doxygen
-    steps:
-    - uses: actions/checkout@v2
-      with:
-        submodules: 'true'
-    - uses: actions/setup-python@v2
-      with:
-        python-version: "3.8"
-        architecture: 'x64'
-    - name: Install system packages
-      run: |
-        sudo apt-get install -y --no-install-recommends doxygen graphviz ninja-build
-        python -m pip install wheel setuptools
-        python -m pip install awscli
-    - name: Run Doxygen
-      run: |
-        mkdir build
-        cd build
-        cmake .. -DBUILD_C_DOC=ON -GNinja
-        ninja -v doc_doxygen
-    - name: Extract branch name
-      shell: bash
-      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
-      id: extract_branch
-      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
-    - name: Publish
-      run: |
-        cd build/
-        tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
-        python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
-      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
-
-  sphinx:
-    runs-on: ubuntu-latest
-    name: Build docs using Sphinx
-    steps:
-    - uses: actions/checkout@v2
-      with:
-        submodules: 'true'
-    - uses: actions/setup-python@v2
-      with:
-        python-version: "3.8"
-        architecture: 'x64'
-    - name: Install system packages
-      run: |
-        sudo apt-get install -y --no-install-recommends graphviz
-        python -m pip install wheel setuptools
-        python -m pip install -r doc/requirements.txt
-    - name: Extract branch name
-      shell: bash
-      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
-      id: extract_branch
-      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
-    - name: Run Sphinx
-      run: |
-        make -C doc html
-      env:
-        SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
+        python3 dmlc-core/scripts/lint.py --exclude_path \
+            python-package/xgboost/dmlc-core \
+            python-package/xgboost/include \
+            python-package/xgboost/lib \
+            python-package/xgboost/rabit \
+            python-package/xgboost/src \
+            --pylint-rc python-package/.pylintrc \
+            xgboost \
+            cpp \
+            include src python-package
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
-project(xgboost LANGUAGES CXX C VERSION 1.7.5)
+project(xgboost LANGUAGES CXX C VERSION 1.7.6)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.7.5.1
-Date: 2023-03-29
+Version: 1.7.6.1
+Date: 2023-06-16
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
         email = "tianqi.tchen@gmail.com"),
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 1.7.5.
+# Generated by GNU Autoconf 2.71 for xgboost 1.7.6.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='1.7.5'
-PACKAGE_STRING='xgboost 1.7.5'
+PACKAGE_VERSION='1.7.6'
+PACKAGE_STRING='xgboost 1.7.6'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures xgboost 1.7.5 to adapt to many kinds of systems.
+\`configure' configures xgboost 1.7.6 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1287,7 +1287,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 1.7.5:";;
+     short | recursive ) echo "Configuration of xgboost 1.7.6:";;
   esac
  cat <<\_ACEOF

@@ -1367,7 +1367,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-xgboost configure 1.7.5
+xgboost configure 1.7.6
 generated by GNU Autoconf 2.71

 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by xgboost $as_me 1.7.5, which was
+It was created by xgboost $as_me 1.7.6, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  $ $0$ac_configure_args_raw
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 1.7.5, which was
+This file was extended by xgboost $as_me 1.7.6, which was
 generated by GNU Autoconf 2.71.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-xgboost config.status 1.7.5
+xgboost config.status 1.7.6
 configured by $0, generated by GNU Autoconf 2.71,
  with options \\"\$ac_cs_config\\"

--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@

 AC_PREREQ(2.69)

-AC_INIT([xgboost],[1.7.5],[],[xgboost],[])
+AC_INIT([xgboost],[1.7.6],[],[xgboost],[])

 : ${R_HOME=`R RHOME`}
 if test -z "${R_HOME}"; then
--- a/doc/c++.rst
+++ b/doc/c++.rst
@@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability.  Alon
 plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
 existing c++ headers for gaining more access to the internal of XGBoost.

-* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_
+* `C++ interface documentation (latest master branch) <./dev/files.html>`_
 * `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_
--- a/doc/c.rst
+++ b/doc/c.rst
@@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
 to sphinx with the help of breathe, which doesn't contain links to examples but might be
 easier to read. For the original doxygen pages please visit:

-* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
+* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
 * `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_

 ***************
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -11,54 +11,107 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
-from subprocess import call
-from sh.contrib import git
-import urllib.request
-from urllib.error import HTTPError
-import sys
-import re
 import os
+import re
+import shutil
 import subprocess
+import sys
+import tarfile
+import urllib.request
+import warnings
+from urllib.error import HTTPError

-git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
+from sh.contrib import git
+
+CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
+TMP_DIR = os.path.join(CURR_PATH, "tmp")
+DOX_DIR = "doxygen"
+
+
+def run_doxygen():
+    """Run the doxygen make command in the designated folder."""
+    curdir = os.path.normpath(os.path.abspath(os.path.curdir))
+    if os.path.exists(TMP_DIR):
+        print(f"Delete directory {TMP_DIR}")
+        shutil.rmtree(TMP_DIR)
+    else:
+        print(f"Create directory {TMP_DIR}")
+        os.mkdir(TMP_DIR)
+    try:
+        os.chdir(PROJECT_ROOT)
+        if not os.path.exists(DOX_DIR):
+            os.mkdir(DOX_DIR)
+        os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
+        print(
+            "Build doxygen at {}".format(
+                os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
+            )
+        )
+        subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
+        subprocess.check_call(["ninja", "doc_doxygen"])
+
+        src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
+        dest = os.path.join(TMP_DIR, "dev")
+        print(f"Copy directory {src} -> {dest}")
+        shutil.copytree(src, dest)
+    except OSError as e:
+        sys.stderr.write("doxygen execution failed: %s" % e)
+    finally:
+        os.chdir(curdir)
+
+
+def is_readthedocs_build():
+    if os.environ.get("READTHEDOCS", None) == "True":
+        return True
+    warnings.warn(
+        "Skipping Doxygen build... You won't have documentation for C/C++ functions. "
+        "Set environment variable READTHEDOCS=True if you want to build Doxygen. "
+        "(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
+        "on your system.)"
+    )
+    return False
+
+
+if is_readthedocs_build():
+    run_doxygen()
+
+
+git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
 if not git_branch:
    # If SPHINX_GIT_BRANCH environment variable is not given, run git
    # to determine branch name
    git_branch = [
-        re.sub(r'origin/', '', x.lstrip(' ')) for x in str(
-            git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')
+        re.sub(r"origin/", "", x.lstrip(" "))
+        for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
    ]
-    git_branch = [x for x in git_branch if 'HEAD' not in x]
+    git_branch = [x for x in git_branch if "HEAD" not in x]
 else:
    git_branch = [git_branch]
-print('git_branch = {}'.format(git_branch[0]))
+print("git_branch = {}".format(git_branch[0]))

 try:
    filename, _ = urllib.request.urlretrieve(
-        'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(
-            git_branch[0]))
-    call(
-        'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'
-        .format(filename),
-        shell=True)
+        f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
+    )
+    if not os.path.exists(TMP_DIR):
+        print(f"Create directory {TMP_DIR}")
+        os.mkdir(TMP_DIR)
+    jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
+    if os.path.exists(jvm_doc_dir):
+        print(f"Delete directory {jvm_doc_dir}")
+        shutil.rmtree(jvm_doc_dir)
+    print(f"Create directory {jvm_doc_dir}")
+    os.mkdir(jvm_doc_dir)
+
+    with tarfile.open(filename, "r:bz2") as t:
+        t.extractall(jvm_doc_dir)
 except HTTPError:
-    print('JVM doc not found. Skipping...')
-try:
-    filename, _ = urllib.request.urlretrieve(
-        'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
-        format(git_branch[0]))
-    call(
-        'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
-        .format(filename),
-        shell=True)
-except HTTPError:
-    print('C API doc not found. Skipping...')
+    print("JVM doc not found. Skipping...")

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
 libpath = os.path.join(PROJECT_ROOT, "python-package/")
 sys.path.insert(0, libpath)
 sys.path.insert(0, CURR_PATH)
@@ -81,50 +134,56 @@ release = xgboost.__version__
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
 extensions = [
-    'matplotlib.sphinxext.plot_directive',
-    'sphinx.ext.autodoc',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.intersphinx',
+    "matplotlib.sphinxext.plot_directive",
+    "sphinxcontrib.jquery",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.intersphinx",
    "sphinx_gallery.gen_gallery",
-    'breathe',
-    'recommonmark'
+    "breathe",
+    "recommonmark",
 ]

 sphinx_gallery_conf = {
    # path to your example scripts
    "examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
    # path to where to save gallery generated output
-    "gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"],
+    "gallery_dirs": [
+        "python/examples",
+        "python/dask-examples",
+        "python/survival-examples",
+    ],
    "matplotlib_animations": True,
 }

 autodoc_typehints = "description"

-graphviz_output_format = 'png'
-plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
+graphviz_output_format = "png"
+plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
 plot_html_show_source_link = False
 plot_html_show_formats = False

 # Breathe extension variables
-DOX_DIR = "doxygen"
-breathe_projects = {
-    "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
-}
+breathe_projects = {}
+if is_readthedocs_build():
+    breathe_projects = {
+        "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
+    }
 breathe_default_project = "xgboost"

 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]

 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
-source_suffix = ['.rst', '.md']
+source_suffix = [".rst", ".md"]

 # The encoding of source files.
 # source_encoding = 'utf-8-sig'

 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -133,7 +192,7 @@ master_doc = 'index'
 # Usually you set "language" from the command line for these cases.
 language = "en"

-autoclass_content = 'both'
+autoclass_content = "both"

 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
@@ -143,8 +202,10 @@ autoclass_content = 'both'

 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
-html_extra_path = ['./tmp']
+exclude_patterns = ["_build"]
+html_extra_path = []
+if is_readthedocs_build():
+    html_extra_path = [TMP_DIR]

 # The reST default role (used for this markup: `text`) to use for all
 # documents.
@@ -162,7 +223,7 @@ html_extra_path = ['./tmp']
 # show_authors = False

 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"

 # A list of ignored prefixes for module index sorting.
 # modindex_common_prefix = []
@@ -185,27 +246,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo

 html_css_files = ["css/custom.css"]

-html_sidebars = {
-  '**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
-}
+html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]

 # Output file base name for HTML help builder.
-htmlhelp_basename = project + 'doc'
+htmlhelp_basename = project + "doc"

 # -- Options for LaTeX output ---------------------------------------------
-latex_elements = {
-}
+latex_elements = {}

 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  (master_doc, '%s.tex' % project, project, author, 'manual'),
+    (master_doc, "%s.tex" % project, project, author, "manual"),
 ]

 intersphinx_mapping = {
@@ -220,30 +278,5 @@ intersphinx_mapping = {
 }


-# hook for doxygen
-def run_doxygen():
-    """Run the doxygen make command in the designated folder."""
-    curdir = os.path.normpath(os.path.abspath(os.path.curdir))
-    try:
-        os.chdir(PROJECT_ROOT)
-        if not os.path.exists(DOX_DIR):
-            os.mkdir(DOX_DIR)
-        os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
-        subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
-        subprocess.check_call(["ninja", "doc_doxygen"])
-    except OSError as e:
-        sys.stderr.write("doxygen execution failed: %s" % e)
-    finally:
-        os.chdir(curdir)
-
-
-def generate_doxygen_xml(app):
-    """Run the doxygen make commands if we're on the ReadTheDocs server"""
-    read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
-    if read_the_docs_build:
-        run_doxygen()
-
-
 def setup(app):
-    app.add_css_file('custom.css')
-    app.connect("builder-inited", generate_doxygen_xml)
+    app.add_css_file("custom.css")
--- a/doc/tutorials/spark_estimator.rst
+++ b/doc/tutorials/spark_estimator.rst
@@ -107,8 +107,8 @@ virtualenv and pip:
  python -m venv xgboost_env
  source xgboost_env/bin/activate
  pip install pyarrow pandas venv-pack xgboost
-  # https://rapids.ai/pip.html#install
-  pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
+  # https://docs.rapids.ai/install#pip-install
+  pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
  venv-pack -o xgboost_env.tar.gz

 With Conda:
@@ -240,7 +240,7 @@ additional spark configurations and dependencies:
    --master spark://<master-ip>:7077 \
    --conf spark.executor.resource.gpu.amount=1 \
    --conf spark.task.resource.gpu.amount=1 \
-    --packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
+    --packages com.nvidia:rapids-4-spark_2.12:23.04.0 \
    --conf spark.plugins=com.nvidia.spark.SQLPlugin \
    --conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
    --archives xgboost_env.tar.gz#environment \
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -508,7 +508,7 @@ class RegTree : public Model {
     * \brief drop the trace after fill, must be called after fill.
     * \param inst The sparse instance to drop.
     */
-    void Drop(const SparsePage::Inst& inst);
+    void Drop();
    /*!
     * \brief returns the size of the feature vector
     * \return the size of the feature vector
@@ -709,13 +709,10 @@ inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) {
  has_missing_ = data_.size() != feature_count;
 }

-inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) {
-  for (auto const& entry : inst) {
-    if (entry.index >= data_.size()) {
-      continue;
-    }
-    data_[entry.index].flag = -1;
-  }
+inline void RegTree::FVec::Drop() {
+  Entry e{};
+  e.flag = -1;
+  std::fill_n(data_.data(), data_.size(), e);
  has_missing_ = true;
 }

--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@

 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 7
-#define XGBOOST_VER_PATCH 5
+#define XGBOOST_VER_PATCH 6

 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@

    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.7.5
+1.7.6
--- a/python-package/xgboost/spark/core.py
+++ b/python-package/xgboost/spark/core.py
@@ -866,7 +866,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
        result_xgb_model = self._convert_to_sklearn_model(
            bytearray(booster, "utf-8"), config
        )
-        return self._copyValues(self._create_pyspark_model(result_xgb_model))
+        spark_model = self._create_pyspark_model(result_xgb_model)
+        # According to pyspark ML convention, the model uid should be the same
+        # with estimator uid.
+        spark_model._resetUid(self.uid)
+        return self._copyValues(spark_model)

    def write(self):
        """
--- a/src/data/gradient_index.cc
+++ b/src/data/gradient_index.cc
@@ -149,10 +149,28 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const {
  return *columns_;
 }

+bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {
+  auto begin = RowIdx(ridx);
+  if (IsDense()) {
+    return static_cast<bst_bin_t>(index[begin + fidx]);
+  }
+  auto end = RowIdx(ridx + 1);
+  auto const& cut_ptrs = cut.Ptrs();
+  auto f_begin = cut_ptrs[fidx];
+  auto f_end = cut_ptrs[fidx + 1];
+  return BinarySearchBin(begin, end, index, f_begin, f_end);
+}
+
 float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
  auto const &values = cut.Values();
  auto const &mins = cut.MinValues();
  auto const &ptrs = cut.Ptrs();
+  return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat);
+}
+
+float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
+                                  std::vector<float> const &values, std::vector<float> const &mins,
+                                  bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
  if (is_cat) {
    auto f_begin = ptrs[fidx];
    auto f_end = ptrs[fidx + 1];
@@ -172,24 +190,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
    }
    return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
  };
-
-  if (columns_->GetColumnType(fidx) == common::kDenseColumn) {
-    if (columns_->AnyMissing()) {
+  switch (columns_->GetColumnType(fidx)) {
+    case common::kDenseColumn: {
+      if (columns_->AnyMissing()) {
+        return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
+          auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
+          return get_bin_val(column);
+        });
+      } else {
+        return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
+          auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
+          auto bin_idx = column[ridx];
+          return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
+        });
+      }
+    }
+    case common::kSparseColumn: {
      return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
-        auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
-        return get_bin_val(column);
-      });
-    } else {
-      return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
-        auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
+        auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
        return get_bin_val(column);
      });
    }
-  } else {
-    return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
-      auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
-      return get_bin_val(column);
-    });
  }

  SPAN_CHECK(false);
--- a/src/data/gradient_index.h
+++ b/src/data/gradient_index.h
@@ -227,7 +227,12 @@ class GHistIndexMatrix {

  common::ColumnMatrix const& Transpose() const;

+  bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
+
  float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
+  float GetFvalue(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,
+                  std::vector<float> const& mins, bst_row_t ridx, bst_feature_t fidx,
+                  bool is_cat) const;

 private:
  std::unique_ptr<common::ColumnMatrix> columns_;
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -63,7 +63,7 @@ bst_float PredValue(const SparsePage::Inst &inst,
      psum += (*trees[i])[nidx].LeafValue();
    }
  }
-  p_feats->Drop(inst);
+  p_feats->Drop();
  return psum;
 }

@@ -116,13 +116,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_
  }
 }

-template <typename DataView>
-void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch,
-              const size_t fvec_offset, std::vector<RegTree::FVec>* p_feats) {
+void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
+              std::vector<RegTree::FVec> *p_feats) {
  for (size_t i = 0; i < block_size; ++i) {
    RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
-    const SparsePage::Inst inst = (*batch)[batch_offset + i];
-    feats.Drop(inst);
+    feats.Drop();
  }
 }

@@ -142,11 +140,15 @@ struct SparsePageView {
 struct GHistIndexMatrixView {
 private:
  GHistIndexMatrix const &page_;
-  uint64_t n_features_;
+  std::uint64_t const n_features_;
  common::Span<FeatureType const> ft_;
  common::Span<Entry> workspace_;
  std::vector<size_t> current_unroll_;

+  std::vector<std::uint32_t> const& ptrs_;
+  std::vector<float> const& mins_;
+  std::vector<float> const& values_;
+
 public:
  size_t base_rowid;

@@ -159,6 +161,9 @@ struct GHistIndexMatrixView {
        ft_{ft},
        workspace_{workplace},
        current_unroll_(n_threads > 0 ? n_threads : 1, 0),
+        ptrs_{_page.cut.Ptrs()},
+        mins_{_page.cut.MinValues()},
+        values_{_page.cut.Values()},
        base_rowid{_page.base_rowid} {}

  SparsePage::Inst operator[](size_t r) {
@@ -167,7 +172,7 @@ struct GHistIndexMatrixView {
    size_t non_missing{beg};

    for (bst_feature_t c = 0; c < n_features_; ++c) {
-      float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
+      float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
      if (!common::CheckNAN(f)) {
        workspace_[non_missing] = Entry{c, f};
        ++non_missing;
@@ -250,10 +255,9 @@ void PredictBatchByBlockOfRowsKernel(
    FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
             p_thread_temp);
    // process block of rows through all trees to keep cache locality
-    PredictByAllTrees(model, tree_begin, tree_end, out_preds,
-                      batch_offset + batch.base_rowid, num_group, thread_temp,
-                      fvec_offset, block_size);
-    FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
+    PredictByAllTrees(model, tree_begin, tree_end, out_preds, batch_offset + batch.base_rowid,
+                      num_group, thread_temp, fvec_offset, block_size);
+    FVecDrop(block_size, fvec_offset, p_thread_temp);
  });
 }

@@ -470,7 +474,7 @@ class CPUPredictor : public Predictor {
          bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
          preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
        }
-        feats.Drop(page[i]);
+        feats.Drop();
      });
    }
  }
@@ -544,7 +548,7 @@ class CPUPredictor : public Predictor {
                  (tree_weights == nullptr ? 1 : (*tree_weights)[j]);
            }
          }
-          feats.Drop(page[i]);
+          feats.Drop();
          // add base margin to BIAS
          if (base_margin.Size() != 0) {
            CHECK_EQ(base_margin.Shape(1), ngroup);
--- a/src/tree/hist/evaluate_splits.h
+++ b/src/tree/hist/evaluate_splits.h
@@ -389,6 +389,7 @@ class HistEvaluator {
    tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
                             tree[candidate.nid].SplitIndex(), left_weight,
                             right_weight);
+    evaluator = tree_evaluator_.GetEvaluator();

    auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
    max_node = std::max(candidate.nid, max_node);
--- a/src/tree/split_evaluator.h
+++ b/src/tree/split_evaluator.h
@@ -48,6 +48,8 @@ class TreeEvaluator {
      monotone_.HostVector().resize(n_features, 0);
      has_constraint_ = false;
    } else {
+      CHECK_LE(p.monotone_constraints.size(), n_features)
+          << "The size of monotone constraint should be less or equal to the number of features.";
      monotone_.HostVector() = p.monotone_constraints;
      monotone_.HostVector().resize(n_features, 0);
      // Initialised to some small size, can grow if needed
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -286,7 +286,7 @@ struct GPUHistMakerDevice {
        matrix.feature_segments,
        matrix.gidx_fvalue_map,
        matrix.min_fvalue,
-        matrix.is_dense
+        matrix.is_dense && !collective::IsDistributed()
    };
    auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
    return split;
@@ -300,11 +300,11 @@ struct GPUHistMakerDevice {
    std::vector<bst_node_t> nidx(2 * candidates.size());
    auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
    auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
-    EvaluateSplitSharedInputs shared_inputs{
-        GPUTrainingParam{param}, *quantiser, feature_types,     matrix.feature_segments,
-        matrix.gidx_fvalue_map,  matrix.min_fvalue,
-        matrix.is_dense
-    };
+    EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types,
+                                            matrix.feature_segments, matrix.gidx_fvalue_map,
+                                            matrix.min_fvalue,
+                                            // is_dense represents the local data
+                                            matrix.is_dense && !collective::IsDistributed()};
    dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
    // Store the feature set ptrs so they dont go out of scope before the kernel is called
    std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -78,7 +78,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(

  {
    GradientPairPrecise grad_stat;
-    if (p_fmat->IsDense()) {
+    if (p_fmat->IsDense() && !collective::IsDistributed()) {
      /**
       * Specialized code for dense data: For dense data (with no missing value), the sum
       * of gradient histogram is equal to snode[nid]
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@@ -89,7 +89,7 @@ class TreeRefresher : public TreeUpdater {
                     dmlc::BeginPtr(stemp[tid]) + offset);
            offset += tree->param.num_nodes;
          }
-          feats.Drop(inst);
+          feats.Drop();
        });
      }
      // aggregate the statistics
--- a/tests/ci_build/conda_env/aarch64_test.yml
+++ b/tests/ci_build/conda_env/aarch64_test.yml
@@ -31,6 +31,5 @@ dependencies:
 - pyspark
 - cloudpickle
 - pip:
-  - shap
  - awscli
  - auditwheel
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/cpu_test.yml
@@ -34,7 +34,6 @@ dependencies:
 - pyarrow
 - protobuf
 - cloudpickle
- shap
 - modin
 # TODO: Replace it with pyspark>=3.4 once 3.4 released.
 # - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
--- a/tests/cpp/tree/test_constraints.cc
+++ b/tests/cpp/tree/test_constraints.cc
@@ -6,6 +6,9 @@
 #include <string>

 #include "../../../src/tree/constraints.h"
+#include "../../../src/tree/hist/evaluate_splits.h"
+#include "../../../src/tree/hist/expand_entry.h"
+#include "../helpers.h"

 namespace xgboost {
 namespace tree {
@@ -56,5 +59,38 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
  ASSERT_FALSE(constraints.Query(1, 5));
 }

+TEST(CPUMonoConstraint, Basic) {
+  std::size_t kRows{64}, kCols{16};
+  Context ctx;
+
+  TrainParam param;
+  std::vector<std::int32_t> mono(kCols, 1);
+  I32Array arr;
+  for (std::size_t i = 0; i < kCols; ++i) {
+    arr.GetArray().push_back(mono[i]);
+  }
+  Json jarr{std::move(arr)};
+  std::string str_mono;
+  Json::Dump(jarr, &str_mono);
+  str_mono.front() = '(';
+  str_mono.back() = ')';
+
+  param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
+
+  auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
+  auto sampler = std::make_shared<common::ColumnSampler>();
+
+  HistEvaluator<CPUExpandEntry> evalutor{param, Xy->Info(), ctx.Threads(), sampler};
+  evalutor.InitRoot(GradStats{2.0, 2.0});
+
+  SplitEntry split;
+  split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
+  CPUExpandEntry entry{0, 0, split};
+  RegTree tree;
+  tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(kCols)}});
+  evalutor.ApplyTreeSplit(entry, &tree);
+
+  ASSERT_TRUE(evalutor.Evaluator().has_constraint);
+}
 }  // namespace tree
 }  // namespace xgboost
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -578,7 +578,7 @@ class TestModels:
        y = rng.randn(rows)
        feature_names = ["test_feature_" + str(i) for i in range(cols)]
        X_pd = pd.DataFrame(X, columns=feature_names)
-        X_pd.iloc[:, 3] = X_pd.iloc[:, 3].astype(np.int32)
+        X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)

        Xy = xgb.DMatrix(X_pd, y)
        assert Xy.feature_types[3] == "int"
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -75,7 +75,10 @@ class TestPandas:
        np.testing.assert_array_equal(result, exp)
        dm = xgb.DMatrix(dummies)
        assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
-        assert dm.feature_types == ['int', 'int', 'int', 'int']
+        if int(pd.__version__[0]) >= 2:
+            assert dm.feature_types == ['int', 'i', 'i', 'i']
+        else:
+            assert dm.feature_types == ['int', 'int', 'int', 'int']
        assert dm.num_row() == 3
        assert dm.num_col() == 4
Author	SHA1	Message	Date
Jiaming Yuan	36eb41c960	Bump version to 1.7.6 (#9305 )	2023-06-16 03:33:16 +08:00
Jiaming Yuan	39ddf40a8d	[backport] Optimize prediction with QuantileDMatrix. (#9096 ) (#9303 )	2023-06-15 23:32:03 +08:00
Jiaming Yuan	573f1c7db4	[backport] Fix monotone constraints on CPU. (#9122 ) (#9287 ) * [backport] Fix monotone constraints on CPU. (#9122)	2023-06-11 17:51:25 +08:00
Jiaming Yuan	abc80d2a6d	[backport] Improve doxygen (#8959 ) (#9284 ) * Remove Sphinx build from GH Action * Build Doxygen as part of RTD build * Add jQuery Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2023-06-11 13:22:23 +08:00
Jiaming Yuan	e882fb3262	[backport] [spark] Make spark model have the same UID with its estimator (#9022 ) (#9285 ) Signed-off-by: Weichen Xu <weichen.xu@databricks.com> Co-authored-by: WeichenXu <weichen.xu@databricks.com>	2023-06-11 13:18:23 +08:00
Jiaming Yuan	3218f6cd3c	[backport] Disable dense opt for distributed training. (#9272 ) (#9288 )	2023-06-11 11:08:45 +08:00
Jiaming Yuan	a962611de7	Disable SHAP test on 1.7 (#9290 )	2023-06-11 02:13:36 +08:00
Jiaming Yuan	14476e8868	[backport] Fix tests with pandas 2.0. (#9014 ) (#9289 ) * Fix tests with pandas 2.0. - `is_categorical` is replaced by `is_categorical_dtype`. - one hot encoding returns boolean type instead of integer type.	2023-06-11 00:52:44 +08:00
Jiaming Yuan	03f3879b71	[backport] [doc] fix the cudf installation [skip ci] (#9106 ) (#9286 ) Co-authored-by: Bobby Wang <wbo4958@gmail.com>	2023-06-10 04:09:27 +08:00
@@ -1 +1 @@
 .7.5
 .7.6