Bump version to 1.7.6 (#9305 )

[backport] Optimize prediction with QuantileDMatrix. (#9096 ) (#9303 )
[backport] Fix monotone constraints on CPU. (#9122 ) (#9287 )
2023-06-16 03:33:16 +08:00 · 2023-06-15 23:32:03 +08:00 · 2023-06-11 17:51:25 +08:00 · 2023-06-11 13:22:23 +08:00 · 2023-06-11 13:18:23 +08:00 · 2023-06-11 11:08:45 +08:00
33 changed files with 284 additions and 233 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -148,66 +148,13 @@ jobs:
      run: |
        LINT_LANG=cpp make lint
-  doxygen:
+        python3 dmlc-core/scripts/lint.py --exclude_path \
-    runs-on: ubuntu-latest
+            python-package/xgboost/dmlc-core \
-    name: Generate C/C++ API doc using Doxygen
+            python-package/xgboost/include \
-    steps:
+            python-package/xgboost/lib \
-    - uses: actions/checkout@v2
+            python-package/xgboost/rabit \
-      with:
+            python-package/xgboost/src \
-        submodules: 'true'
+            --pylint-rc python-package/.pylintrc \
-    - uses: actions/setup-python@v2
+            xgboost \
-      with:
+            cpp \
-        python-version: "3.8"
+            include src python-package
        architecture: 'x64'
    - name: Install system packages
      run: |
        sudo apt-get install -y --no-install-recommends doxygen graphviz ninja-build
        python -m pip install wheel setuptools
        python -m pip install awscli
    - name: Run Doxygen
      run: |
        mkdir build
        cd build
        cmake .. -DBUILD_C_DOC=ON -GNinja
        ninja -v doc_doxygen
    - name: Extract branch name
      shell: bash
      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
      id: extract_branch
      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
    - name: Publish
      run: |
        cd build/
        tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
        python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
      env:
        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
  sphinx:
    runs-on: ubuntu-latest
    name: Build docs using Sphinx
    steps:
    - uses: actions/checkout@v2
      with:
        submodules: 'true'
    - uses: actions/setup-python@v2
      with:
        python-version: "3.8"
        architecture: 'x64'
    - name: Install system packages
      run: |
        sudo apt-get install -y --no-install-recommends graphviz
        python -m pip install wheel setuptools
        python -m pip install -r doc/requirements.txt
    - name: Extract branch name
      shell: bash
      run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
      id: extract_branch
      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
    - name: Run Sphinx
      run: |
        make -C doc html
      env:
        SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
-project(xgboost LANGUAGES CXX C VERSION 1.7.5)
+project(xgboost LANGUAGES CXX C VERSION 1.7.6)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.7.5.1
+Version: 1.7.6.1
-Date: 2023-03-29
+Date: 2023-06-16
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
         email = "tianqi.tchen@gmail.com"),
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 1.7.5.
+# Generated by GNU Autoconf 2.71 for xgboost 1.7.6.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='1.7.5'
+PACKAGE_VERSION='1.7.6'
-PACKAGE_STRING='xgboost 1.7.5'
+PACKAGE_STRING='xgboost 1.7.6'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures xgboost 1.7.5 to adapt to many kinds of systems.
+\`configure' configures xgboost 1.7.6 to adapt to many kinds of systems.
 Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1287,7 +1287,7 @@ fi
 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 1.7.5:";;
+     short | recursive ) echo "Configuration of xgboost 1.7.6:";;
   esac
  cat <<\_ACEOF
@@ -1367,7 +1367,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-xgboost configure 1.7.5
+xgboost configure 1.7.6
 generated by GNU Autoconf 2.71
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
-It was created by xgboost $as_me 1.7.5, which was
+It was created by xgboost $as_me 1.7.6, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
  $ $0$ac_configure_args_raw
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 1.7.5, which was
+This file was extended by xgboost $as_me 1.7.6, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
  CONFIG_FILES    = $CONFIG_FILES
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-xgboost config.status 1.7.5
+xgboost config.status 1.7.6
 configured by $0, generated by GNU Autoconf 2.71,
  with options \\"\$ac_cs_config\\"
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
 AC_PREREQ(2.69)
-AC_INIT([xgboost],[1.7.5],[],[xgboost],[])
+AC_INIT([xgboost],[1.7.6],[],[xgboost],[])
 : ${R_HOME=`R RHOME`}
 if test -z "${R_HOME}"; then
--- a/doc/c++.rst
+++ b/doc/c++.rst
@@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability.  Alon
 plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
 existing c++ headers for gaining more access to the internal of XGBoost.
-* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_
+* `C++ interface documentation (latest master branch) <./dev/files.html>`_
 * `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_
--- a/doc/c.rst
+++ b/doc/c.rst
@@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
 to sphinx with the help of breathe, which doesn't contain links to examples but might be
 easier to read. For the original doxygen pages please visit:
-* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
+* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
 * `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
 ***************
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -11,54 +11,107 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 from subprocess import call
 from sh.contrib import git
 import urllib.request
 from urllib.error import HTTPError
 import sys
 import re
 import os
 import re
 import shutil
 import subprocess
 import sys
 import tarfile
 import urllib.request
 import warnings
 from urllib.error import HTTPError
-git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
+from sh.contrib import git
 CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
 TMP_DIR = os.path.join(CURR_PATH, "tmp")
 DOX_DIR = "doxygen"
 def run_doxygen():
    """Run the doxygen make command in the designated folder."""
    curdir = os.path.normpath(os.path.abspath(os.path.curdir))
    if os.path.exists(TMP_DIR):
        print(f"Delete directory {TMP_DIR}")
        shutil.rmtree(TMP_DIR)
    else:
        print(f"Create directory {TMP_DIR}")
        os.mkdir(TMP_DIR)
    try:
        os.chdir(PROJECT_ROOT)
        if not os.path.exists(DOX_DIR):
            os.mkdir(DOX_DIR)
        os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
        print(
            "Build doxygen at {}".format(
                os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
            )
        )
        subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
        subprocess.check_call(["ninja", "doc_doxygen"])
        src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
        dest = os.path.join(TMP_DIR, "dev")
        print(f"Copy directory {src} -> {dest}")
        shutil.copytree(src, dest)
    except OSError as e:
        sys.stderr.write("doxygen execution failed: %s" % e)
    finally:
        os.chdir(curdir)
 def is_readthedocs_build():
    if os.environ.get("READTHEDOCS", None) == "True":
        return True
    warnings.warn(
        "Skipping Doxygen build... You won't have documentation for C/C++ functions. "
        "Set environment variable READTHEDOCS=True if you want to build Doxygen. "
        "(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
        "on your system.)"
    )
    return False
 if is_readthedocs_build():
    run_doxygen()
 git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
 if not git_branch:
    # If SPHINX_GIT_BRANCH environment variable is not given, run git
    # to determine branch name
    git_branch = [
-        re.sub(r'origin/', '', x.lstrip(' ')) for x in str(
+        re.sub(r"origin/", "", x.lstrip(" "))
-            git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')
+        for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
    ]
-    git_branch = [x for x in git_branch if 'HEAD' not in x]
+    git_branch = [x for x in git_branch if "HEAD" not in x]
 else:
    git_branch = [git_branch]
-print('git_branch = {}'.format(git_branch[0]))
+print("git_branch = {}".format(git_branch[0]))
 try:
    filename, _ = urllib.request.urlretrieve(
-        'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(
+        f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
-            git_branch[0]))
+    )
-    call(
+    if not os.path.exists(TMP_DIR):
-        'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'
+        print(f"Create directory {TMP_DIR}")
-        .format(filename),
+        os.mkdir(TMP_DIR)
-        shell=True)
+    jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
    if os.path.exists(jvm_doc_dir):
        print(f"Delete directory {jvm_doc_dir}")
        shutil.rmtree(jvm_doc_dir)
    print(f"Create directory {jvm_doc_dir}")
    os.mkdir(jvm_doc_dir)
    with tarfile.open(filename, "r:bz2") as t:
        t.extractall(jvm_doc_dir)
 except HTTPError:
-    print('JVM doc not found. Skipping...')
+    print("JVM doc not found. Skipping...")
 try:
    filename, _ = urllib.request.urlretrieve(
        'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
        format(git_branch[0]))
    call(
        'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
        .format(filename),
        shell=True)
 except HTTPError:
    print('C API doc not found. Skipping...')
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
 libpath = os.path.join(PROJECT_ROOT, "python-package/")
 sys.path.insert(0, libpath)
 sys.path.insert(0, CURR_PATH)
@@ -81,50 +134,56 @@ release = xgboost.__version__
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
 extensions = [
-    'matplotlib.sphinxext.plot_directive',
+    "matplotlib.sphinxext.plot_directive",
-    'sphinx.ext.autodoc',
+    "sphinxcontrib.jquery",
-    'sphinx.ext.napoleon',
+    "sphinx.ext.autodoc",
-    'sphinx.ext.mathjax',
+    "sphinx.ext.napoleon",
-    'sphinx.ext.intersphinx',
+    "sphinx.ext.mathjax",
    "sphinx.ext.intersphinx",
    "sphinx_gallery.gen_gallery",
-    'breathe',
+    "breathe",
-    'recommonmark'
+    "recommonmark",
 ]
 sphinx_gallery_conf = {
    # path to your example scripts
    "examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
    # path to where to save gallery generated output
-    "gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"],
+    "gallery_dirs": [
        "python/examples",
        "python/dask-examples",
        "python/survival-examples",
    ],
    "matplotlib_animations": True,
 }
 autodoc_typehints = "description"
-graphviz_output_format = 'png'
+graphviz_output_format = "png"
-plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
+plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
 plot_html_show_source_link = False
 plot_html_show_formats = False
 # Breathe extension variables
-DOX_DIR = "doxygen"
+breathe_projects = {}
-breathe_projects = {
+if is_readthedocs_build():
-    "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
+    breathe_projects = {
-}
+        "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
    }
 breathe_default_project = "xgboost"
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
-source_suffix = ['.rst', '.md']
+source_suffix = [".rst", ".md"]
 # The encoding of source files.
 # source_encoding = 'utf-8-sig'
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -133,7 +192,7 @@ master_doc = 'index'
 # Usually you set "language" from the command line for these cases.
 language = "en"
-autoclass_content = 'both'
+autoclass_content = "both"
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
@@ -143,8 +202,10 @@ autoclass_content = 'both'
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
-html_extra_path = ['./tmp']
+html_extra_path = []
 if is_readthedocs_build():
    html_extra_path = [TMP_DIR]
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
@@ -162,7 +223,7 @@ html_extra_path = ['./tmp']
 # show_authors = False
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 # A list of ignored prefixes for module index sorting.
 # modindex_common_prefix = []
@@ -185,27 +246,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo
 html_css_files = ["css/custom.css"]
-html_sidebars = {
+html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}
  '**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
 }
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 # Output file base name for HTML help builder.
-htmlhelp_basename = project + 'doc'
+htmlhelp_basename = project + "doc"
 # -- Options for LaTeX output ---------------------------------------------
-latex_elements = {
+latex_elements = {}
 }
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  (master_doc, '%s.tex' % project, project, author, 'manual'),
+    (master_doc, "%s.tex" % project, project, author, "manual"),
 ]
 intersphinx_mapping = {
@@ -220,30 +278,5 @@ intersphinx_mapping = {
 }
 # hook for doxygen
 def run_doxygen():
    """Run the doxygen make command in the designated folder."""
    curdir = os.path.normpath(os.path.abspath(os.path.curdir))
    try:
        os.chdir(PROJECT_ROOT)
        if not os.path.exists(DOX_DIR):
            os.mkdir(DOX_DIR)
        os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
        subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
        subprocess.check_call(["ninja", "doc_doxygen"])
    except OSError as e:
        sys.stderr.write("doxygen execution failed: %s" % e)
    finally:
        os.chdir(curdir)
 def generate_doxygen_xml(app):
    """Run the doxygen make commands if we're on the ReadTheDocs server"""
    read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
    if read_the_docs_build:
        run_doxygen()
 def setup(app):
-    app.add_css_file('custom.css')
+    app.add_css_file("custom.css")
    app.connect("builder-inited", generate_doxygen_xml)
--- a/doc/tutorials/spark_estimator.rst
+++ b/doc/tutorials/spark_estimator.rst
@@ -107,8 +107,8 @@ virtualenv and pip:
  python -m venv xgboost_env
  source xgboost_env/bin/activate
  pip install pyarrow pandas venv-pack xgboost
-  # https://rapids.ai/pip.html#install
+  # https://docs.rapids.ai/install#pip-install
-  pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
+  pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
  venv-pack -o xgboost_env.tar.gz
 With Conda:
@@ -240,7 +240,7 @@ additional spark configurations and dependencies:
    --master spark://<master-ip>:7077 \
    --conf spark.executor.resource.gpu.amount=1 \
    --conf spark.task.resource.gpu.amount=1 \
-    --packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
+    --packages com.nvidia:rapids-4-spark_2.12:23.04.0 \
    --conf spark.plugins=com.nvidia.spark.SQLPlugin \
    --conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
    --archives xgboost_env.tar.gz#environment \
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -508,7 +508,7 @@ class RegTree : public Model {
     * \brief drop the trace after fill, must be called after fill.
     * \param inst The sparse instance to drop.
     */
-    void Drop(const SparsePage::Inst& inst);
+    void Drop();
    /*!
     * \brief returns the size of the feature vector
     * \return the size of the feature vector
@@ -709,13 +709,10 @@ inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) {
  has_missing_ = data_.size() != feature_count;
 }
-inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) {
+inline void RegTree::FVec::Drop() {
-  for (auto const& entry : inst) {
+  Entry e{};
-    if (entry.index >= data_.size()) {
+  e.flag = -1;
-      continue;
+  std::fill_n(data_.data(), data_.size(), e);
    }
    data_[entry.index].flag = -1;
  }
  has_missing_ = true;
 }
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@
 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 7
-#define XGBOOST_VER_PATCH 5
+#define XGBOOST_VER_PATCH 6
 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>jar</packaging>
    <dependencies>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.5</version>
+            <version>1.7.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.5</version>
+        <version>1.7.6</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.7.5</version>
+    <version>1.7.6</version>
    <packaging>jar</packaging>
    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.7.5
+1.7.6
--- a/python-package/xgboost/spark/core.py
+++ b/python-package/xgboost/spark/core.py
@@ -866,7 +866,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
        result_xgb_model = self._convert_to_sklearn_model(
            bytearray(booster, "utf-8"), config
        )
-        return self._copyValues(self._create_pyspark_model(result_xgb_model))
+        spark_model = self._create_pyspark_model(result_xgb_model)
        # According to pyspark ML convention, the model uid should be the same
        # with estimator uid.
        spark_model._resetUid(self.uid)
        return self._copyValues(spark_model)
    def write(self):
        """
--- a/src/data/gradient_index.cc
+++ b/src/data/gradient_index.cc
@@ -149,10 +149,28 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const {
  return *columns_;
 }
 bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {
  auto begin = RowIdx(ridx);
  if (IsDense()) {
    return static_cast<bst_bin_t>(index[begin + fidx]);
  }
  auto end = RowIdx(ridx + 1);
  auto const& cut_ptrs = cut.Ptrs();
  auto f_begin = cut_ptrs[fidx];
  auto f_end = cut_ptrs[fidx + 1];
  return BinarySearchBin(begin, end, index, f_begin, f_end);
 }
 float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
  auto const &values = cut.Values();
  auto const &mins = cut.MinValues();
  auto const &ptrs = cut.Ptrs();
  return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat);
 }
 float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
                                  std::vector<float> const &values, std::vector<float> const &mins,
                                  bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
  if (is_cat) {
    auto f_begin = ptrs[fidx];
    auto f_end = ptrs[fidx + 1];
@@ -172,24 +190,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
    }
    return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
  };
-
+  switch (columns_->GetColumnType(fidx)) {
-  if (columns_->GetColumnType(fidx) == common::kDenseColumn) {
+    case common::kDenseColumn: {
-    if (columns_->AnyMissing()) {
+      if (columns_->AnyMissing()) {
        return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
          auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
          return get_bin_val(column);
        });
      } else {
        return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
          auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
          auto bin_idx = column[ridx];
          return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
        });
      }
    }
    case common::kSparseColumn: {
      return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
-        auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
+        auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
        return get_bin_val(column);
      });
    } else {
      return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
        auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
        return get_bin_val(column);
      });
    }
  } else {
    return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
      auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
      return get_bin_val(column);
    });
  }
  SPAN_CHECK(false);
--- a/src/data/gradient_index.h
+++ b/src/data/gradient_index.h
@@ -227,7 +227,12 @@ class GHistIndexMatrix {
  common::ColumnMatrix const& Transpose() const;
  bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
  float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
  float GetFvalue(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,
                  std::vector<float> const& mins, bst_row_t ridx, bst_feature_t fidx,
                  bool is_cat) const;
 private:
  std::unique_ptr<common::ColumnMatrix> columns_;
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -63,7 +63,7 @@ bst_float PredValue(const SparsePage::Inst &inst,
      psum += (*trees[i])[nidx].LeafValue();
    }
  }
-  p_feats->Drop(inst);
+  p_feats->Drop();
  return psum;
 }
@@ -116,13 +116,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_
  }
 }
-template <typename DataView>
+void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
-void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch,
+              std::vector<RegTree::FVec> *p_feats) {
              const size_t fvec_offset, std::vector<RegTree::FVec>* p_feats) {
  for (size_t i = 0; i < block_size; ++i) {
    RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
-    const SparsePage::Inst inst = (*batch)[batch_offset + i];
+    feats.Drop();
    feats.Drop(inst);
  }
 }
@@ -142,11 +140,15 @@ struct SparsePageView {
 struct GHistIndexMatrixView {
 private:
  GHistIndexMatrix const &page_;
-  uint64_t n_features_;
+  std::uint64_t const n_features_;
  common::Span<FeatureType const> ft_;
  common::Span<Entry> workspace_;
  std::vector<size_t> current_unroll_;
  std::vector<std::uint32_t> const& ptrs_;
  std::vector<float> const& mins_;
  std::vector<float> const& values_;
 public:
  size_t base_rowid;
@@ -159,6 +161,9 @@ struct GHistIndexMatrixView {
        ft_{ft},
        workspace_{workplace},
        current_unroll_(n_threads > 0 ? n_threads : 1, 0),
        ptrs_{_page.cut.Ptrs()},
        mins_{_page.cut.MinValues()},
        values_{_page.cut.Values()},
        base_rowid{_page.base_rowid} {}
  SparsePage::Inst operator[](size_t r) {
@@ -167,7 +172,7 @@ struct GHistIndexMatrixView {
    size_t non_missing{beg};
    for (bst_feature_t c = 0; c < n_features_; ++c) {
-      float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
+      float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
      if (!common::CheckNAN(f)) {
        workspace_[non_missing] = Entry{c, f};
        ++non_missing;
@@ -250,10 +255,9 @@ void PredictBatchByBlockOfRowsKernel(
    FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
             p_thread_temp);
    // process block of rows through all trees to keep cache locality
-    PredictByAllTrees(model, tree_begin, tree_end, out_preds,
+    PredictByAllTrees(model, tree_begin, tree_end, out_preds, batch_offset + batch.base_rowid,
-                      batch_offset + batch.base_rowid, num_group, thread_temp,
+                      num_group, thread_temp, fvec_offset, block_size);
-                      fvec_offset, block_size);
+    FVecDrop(block_size, fvec_offset, p_thread_temp);
    FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
  });
 }
@@ -470,7 +474,7 @@ class CPUPredictor : public Predictor {
          bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
          preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
        }
-        feats.Drop(page[i]);
+        feats.Drop();
      });
    }
  }
@@ -544,7 +548,7 @@ class CPUPredictor : public Predictor {
                  (tree_weights == nullptr ? 1 : (*tree_weights)[j]);
            }
          }
-          feats.Drop(page[i]);
+          feats.Drop();
          // add base margin to BIAS
          if (base_margin.Size() != 0) {
            CHECK_EQ(base_margin.Shape(1), ngroup);
--- a/src/tree/hist/evaluate_splits.h
+++ b/src/tree/hist/evaluate_splits.h
@@ -389,6 +389,7 @@ class HistEvaluator {
    tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
                             tree[candidate.nid].SplitIndex(), left_weight,
                             right_weight);
    evaluator = tree_evaluator_.GetEvaluator();
    auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
    max_node = std::max(candidate.nid, max_node);
--- a/src/tree/split_evaluator.h
+++ b/src/tree/split_evaluator.h
@@ -48,6 +48,8 @@ class TreeEvaluator {
      monotone_.HostVector().resize(n_features, 0);
      has_constraint_ = false;
    } else {
      CHECK_LE(p.monotone_constraints.size(), n_features)
          << "The size of monotone constraint should be less or equal to the number of features.";
      monotone_.HostVector() = p.monotone_constraints;
      monotone_.HostVector().resize(n_features, 0);
      // Initialised to some small size, can grow if needed
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -286,7 +286,7 @@ struct GPUHistMakerDevice {
        matrix.feature_segments,
        matrix.gidx_fvalue_map,
        matrix.min_fvalue,
-        matrix.is_dense
+        matrix.is_dense && !collective::IsDistributed()
    };
    auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
    return split;
@@ -300,11 +300,11 @@ struct GPUHistMakerDevice {
    std::vector<bst_node_t> nidx(2 * candidates.size());
    auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
    auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
-    EvaluateSplitSharedInputs shared_inputs{
+    EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types,
-        GPUTrainingParam{param}, *quantiser, feature_types,     matrix.feature_segments,
+                                            matrix.feature_segments, matrix.gidx_fvalue_map,
-        matrix.gidx_fvalue_map,  matrix.min_fvalue,
+                                            matrix.min_fvalue,
-        matrix.is_dense
+                                            // is_dense represents the local data
-    };
+                                            matrix.is_dense && !collective::IsDistributed()};
    dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
    // Store the feature set ptrs so they dont go out of scope before the kernel is called
    std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -78,7 +78,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
  {
    GradientPairPrecise grad_stat;
-    if (p_fmat->IsDense()) {
+    if (p_fmat->IsDense() && !collective::IsDistributed()) {
      /**
       * Specialized code for dense data: For dense data (with no missing value), the sum
       * of gradient histogram is equal to snode[nid]
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@@ -89,7 +89,7 @@ class TreeRefresher : public TreeUpdater {
                     dmlc::BeginPtr(stemp[tid]) + offset);
            offset += tree->param.num_nodes;
          }
-          feats.Drop(inst);
+          feats.Drop();
        });
      }
      // aggregate the statistics
--- a/tests/ci_build/conda_env/aarch64_test.yml
+++ b/tests/ci_build/conda_env/aarch64_test.yml
@@ -31,6 +31,5 @@ dependencies:
 - pyspark
 - cloudpickle
 - pip:
  - shap
  - awscli
  - auditwheel
--- a/tests/ci_build/conda_env/cpu_test.yml
+++ b/tests/ci_build/conda_env/cpu_test.yml
@@ -34,7 +34,6 @@ dependencies:
 - pyarrow
 - protobuf
 - cloudpickle
 - shap
 - modin
 # TODO: Replace it with pyspark>=3.4 once 3.4 released.
 # - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
--- a/tests/cpp/tree/test_constraints.cc
+++ b/tests/cpp/tree/test_constraints.cc
@@ -6,6 +6,9 @@
 #include <string>
 #include "../../../src/tree/constraints.h"
 #include "../../../src/tree/hist/evaluate_splits.h"
 #include "../../../src/tree/hist/expand_entry.h"
 #include "../helpers.h"
 namespace xgboost {
 namespace tree {
@@ -56,5 +59,38 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
  ASSERT_FALSE(constraints.Query(1, 5));
 }
 TEST(CPUMonoConstraint, Basic) {
  std::size_t kRows{64}, kCols{16};
  Context ctx;
  TrainParam param;
  std::vector<std::int32_t> mono(kCols, 1);
  I32Array arr;
  for (std::size_t i = 0; i < kCols; ++i) {
    arr.GetArray().push_back(mono[i]);
  }
  Json jarr{std::move(arr)};
  std::string str_mono;
  Json::Dump(jarr, &str_mono);
  str_mono.front() = '(';
  str_mono.back() = ')';
  param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
  auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
  auto sampler = std::make_shared<common::ColumnSampler>();
  HistEvaluator<CPUExpandEntry> evalutor{param, Xy->Info(), ctx.Threads(), sampler};
  evalutor.InitRoot(GradStats{2.0, 2.0});
  SplitEntry split;
  split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
  CPUExpandEntry entry{0, 0, split};
  RegTree tree;
  tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(kCols)}});
  evalutor.ApplyTreeSplit(entry, &tree);
  ASSERT_TRUE(evalutor.Evaluator().has_constraint);
 }
 }  // namespace tree
 }  // namespace xgboost
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -578,7 +578,7 @@ class TestModels:
        y = rng.randn(rows)
        feature_names = ["test_feature_" + str(i) for i in range(cols)]
        X_pd = pd.DataFrame(X, columns=feature_names)
-        X_pd.iloc[:, 3] = X_pd.iloc[:, 3].astype(np.int32)
+        X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)
        Xy = xgb.DMatrix(X_pd, y)
        assert Xy.feature_types[3] == "int"
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -75,7 +75,10 @@ class TestPandas:
        np.testing.assert_array_equal(result, exp)
        dm = xgb.DMatrix(dummies)
        assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
-        assert dm.feature_types == ['int', 'int', 'int', 'int']
+        if int(pd.__version__[0]) >= 2:
            assert dm.feature_types == ['int', 'i', 'i', 'i']
        else:
            assert dm.feature_types == ['int', 'int', 'int', 'int']
        assert dm.num_row() == 3
        assert dm.num_col() == 4
Author	SHA1	Message	Date
Jiaming Yuan	36eb41c960	Bump version to 1.7.6 (#9305 )	2023-06-16 03:33:16 +08:00
Jiaming Yuan	39ddf40a8d	[backport] Optimize prediction with QuantileDMatrix. (#9096 ) (#9303 )	2023-06-15 23:32:03 +08:00
Jiaming Yuan	573f1c7db4	[backport] Fix monotone constraints on CPU. (#9122 ) (#9287 ) * [backport] Fix monotone constraints on CPU. (#9122)	2023-06-11 17:51:25 +08:00
Jiaming Yuan	abc80d2a6d	[backport] Improve doxygen (#8959 ) (#9284 ) * Remove Sphinx build from GH Action * Build Doxygen as part of RTD build * Add jQuery Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2023-06-11 13:22:23 +08:00
Jiaming Yuan	e882fb3262	[backport] [spark] Make spark model have the same UID with its estimator (#9022 ) (#9285 ) Signed-off-by: Weichen Xu <weichen.xu@databricks.com> Co-authored-by: WeichenXu <weichen.xu@databricks.com>	2023-06-11 13:18:23 +08:00
Jiaming Yuan	3218f6cd3c	[backport] Disable dense opt for distributed training. (#9272 ) (#9288 )	2023-06-11 11:08:45 +08:00
Jiaming Yuan	a962611de7	Disable SHAP test on 1.7 (#9290 )	2023-06-11 02:13:36 +08:00
Jiaming Yuan	14476e8868	[backport] Fix tests with pandas 2.0. (#9014 ) (#9289 ) * Fix tests with pandas 2.0. - `is_categorical` is replaced by `is_categorical_dtype`. - one hot encoding returns boolean type instead of integer type.	2023-06-11 00:52:44 +08:00
Jiaming Yuan	03f3879b71	[backport] [doc] fix the cudf installation [skip ci] (#9106 ) (#9286 ) Co-authored-by: Bobby Wang <wbo4958@gmail.com>	2023-06-10 04:09:27 +08:00