Compare commits

..

9 Commits

Author SHA1 Message Date
Jiaming Yuan
36eb41c960 Bump version to 1.7.6 (#9305) 2023-06-16 03:33:16 +08:00
Jiaming Yuan
39ddf40a8d [backport] Optimize prediction with QuantileDMatrix. (#9096) (#9303) 2023-06-15 23:32:03 +08:00
Jiaming Yuan
573f1c7db4 [backport] Fix monotone constraints on CPU. (#9122) (#9287)
* [backport] Fix monotone constraints on CPU. (#9122)
2023-06-11 17:51:25 +08:00
Jiaming Yuan
abc80d2a6d [backport] Improve doxygen (#8959) (#9284)
* Remove Sphinx build from GH Action

* Build Doxygen as part of RTD build

* Add jQuery

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2023-06-11 13:22:23 +08:00
Jiaming Yuan
e882fb3262 [backport] [spark] Make spark model have the same UID with its estimator (#9022) (#9285)
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
Co-authored-by: WeichenXu <weichen.xu@databricks.com>
2023-06-11 13:18:23 +08:00
Jiaming Yuan
3218f6cd3c [backport] Disable dense opt for distributed training. (#9272) (#9288) 2023-06-11 11:08:45 +08:00
Jiaming Yuan
a962611de7 Disable SHAP test on 1.7 (#9290) 2023-06-11 02:13:36 +08:00
Jiaming Yuan
14476e8868 [backport] Fix tests with pandas 2.0. (#9014) (#9289)
* Fix tests with pandas 2.0.

- `is_categorical` is replaced by `is_categorical_dtype`.
- one hot encoding returns boolean type instead of integer type.
2023-06-11 00:52:44 +08:00
Jiaming Yuan
03f3879b71 [backport] [doc] fix the cudf installation [skip ci] (#9106) (#9286)
Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-06-10 04:09:27 +08:00
33 changed files with 284 additions and 233 deletions

View File

@@ -148,66 +148,13 @@ jobs:
run: | run: |
LINT_LANG=cpp make lint LINT_LANG=cpp make lint
doxygen: python3 dmlc-core/scripts/lint.py --exclude_path \
runs-on: ubuntu-latest python-package/xgboost/dmlc-core \
name: Generate C/C++ API doc using Doxygen python-package/xgboost/include \
steps: python-package/xgboost/lib \
- uses: actions/checkout@v2 python-package/xgboost/rabit \
with: python-package/xgboost/src \
submodules: 'true' --pylint-rc python-package/.pylintrc \
- uses: actions/setup-python@v2 xgboost \
with: cpp \
python-version: "3.8" include src python-package
architecture: 'x64'
- name: Install system packages
run: |
sudo apt-get install -y --no-install-recommends doxygen graphviz ninja-build
python -m pip install wheel setuptools
python -m pip install awscli
- name: Run Doxygen
run: |
mkdir build
cd build
cmake .. -DBUILD_C_DOC=ON -GNinja
ninja -v doc_doxygen
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- name: Publish
run: |
cd build/
tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
sphinx:
runs-on: ubuntu-latest
name: Build docs using Sphinx
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- uses: actions/setup-python@v2
with:
python-version: "3.8"
architecture: 'x64'
- name: Install system packages
run: |
sudo apt-get install -y --no-install-recommends graphviz
python -m pip install wheel setuptools
python -m pip install -r doc/requirements.txt
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- name: Run Sphinx
run: |
make -C doc html
env:
SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.7.5) project(xgboost LANGUAGES CXX C VERSION 1.7.6)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)

View File

@@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.7.5.1 Version: 1.7.6.1
Date: 2023-03-29 Date: 2023-06-16
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

18
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for xgboost 1.7.5. # Generated by GNU Autoconf 2.71 for xgboost 1.7.6.
# #
# #
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='xgboost' PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost' PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='1.7.5' PACKAGE_VERSION='1.7.6'
PACKAGE_STRING='xgboost 1.7.5' PACKAGE_STRING='xgboost 1.7.6'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures xgboost 1.7.5 to adapt to many kinds of systems. \`configure' configures xgboost 1.7.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1287,7 +1287,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 1.7.5:";; short | recursive ) echo "Configuration of xgboost 1.7.6:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1367,7 +1367,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
xgboost configure 1.7.5 xgboost configure 1.7.6
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 1.7.5, which was It was created by xgboost $as_me 1.7.6, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by xgboost $as_me 1.7.5, which was This file was extended by xgboost $as_me 1.7.6, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
xgboost config.status 1.7.5 xgboost config.status 1.7.6
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69) AC_PREREQ(2.69)
AC_INIT([xgboost],[1.7.5],[],[xgboost],[]) AC_INIT([xgboost],[1.7.6],[],[xgboost],[])
: ${R_HOME=`R RHOME`} : ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then if test -z "${R_HOME}"; then

View File

@@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability. Alon
plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
existing c++ headers for gaining more access to the internal of XGBoost. existing c++ headers for gaining more access to the internal of XGBoost.
* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_ * `C++ interface documentation (latest master branch) <./dev/files.html>`_
* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_ * `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_

View File

@@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
to sphinx with the help of breathe, which doesn't contain links to examples but might be to sphinx with the help of breathe, which doesn't contain links to examples but might be
easier to read. For the original doxygen pages please visit: easier to read. For the original doxygen pages please visit:
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_ * `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_ * `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
*************** ***************

View File

@@ -11,54 +11,107 @@
# #
# All configuration values have a default; values that are commented out # All configuration values have a default; values that are commented out
# serve to show the default. # serve to show the default.
from subprocess import call
from sh.contrib import git
import urllib.request
from urllib.error import HTTPError
import sys
import re
import os import os
import re
import shutil
import subprocess import subprocess
import sys
import tarfile
import urllib.request
import warnings
from urllib.error import HTTPError
git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None) from sh.contrib import git
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
TMP_DIR = os.path.join(CURR_PATH, "tmp")
DOX_DIR = "doxygen"
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
if os.path.exists(TMP_DIR):
print(f"Delete directory {TMP_DIR}")
shutil.rmtree(TMP_DIR)
else:
print(f"Create directory {TMP_DIR}")
os.mkdir(TMP_DIR)
try:
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
print(
"Build doxygen at {}".format(
os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
)
)
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
dest = os.path.join(TMP_DIR, "dev")
print(f"Copy directory {src} -> {dest}")
shutil.copytree(src, dest)
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def is_readthedocs_build():
if os.environ.get("READTHEDOCS", None) == "True":
return True
warnings.warn(
"Skipping Doxygen build... You won't have documentation for C/C++ functions. "
"Set environment variable READTHEDOCS=True if you want to build Doxygen. "
"(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
"on your system.)"
)
return False
if is_readthedocs_build():
run_doxygen()
git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
if not git_branch: if not git_branch:
# If SPHINX_GIT_BRANCH environment variable is not given, run git # If SPHINX_GIT_BRANCH environment variable is not given, run git
# to determine branch name # to determine branch name
git_branch = [ git_branch = [
re.sub(r'origin/', '', x.lstrip(' ')) for x in str( re.sub(r"origin/", "", x.lstrip(" "))
git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n') for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
] ]
git_branch = [x for x in git_branch if 'HEAD' not in x] git_branch = [x for x in git_branch if "HEAD" not in x]
else: else:
git_branch = [git_branch] git_branch = [git_branch]
print('git_branch = {}'.format(git_branch[0])) print("git_branch = {}".format(git_branch[0]))
try: try:
filename, _ = urllib.request.urlretrieve( filename, _ = urllib.request.urlretrieve(
'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format( f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
git_branch[0])) )
call( if not os.path.exists(TMP_DIR):
'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}' print(f"Create directory {TMP_DIR}")
.format(filename), os.mkdir(TMP_DIR)
shell=True) jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
if os.path.exists(jvm_doc_dir):
print(f"Delete directory {jvm_doc_dir}")
shutil.rmtree(jvm_doc_dir)
print(f"Create directory {jvm_doc_dir}")
os.mkdir(jvm_doc_dir)
with tarfile.open(filename, "r:bz2") as t:
t.extractall(jvm_doc_dir)
except HTTPError: except HTTPError:
print('JVM doc not found. Skipping...') print("JVM doc not found. Skipping...")
try:
filename, _ = urllib.request.urlretrieve(
'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
format(git_branch[0]))
call(
'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
.format(filename),
shell=True)
except HTTPError:
print('C API doc not found. Skipping...')
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
libpath = os.path.join(PROJECT_ROOT, "python-package/") libpath = os.path.join(PROJECT_ROOT, "python-package/")
sys.path.insert(0, libpath) sys.path.insert(0, libpath)
sys.path.insert(0, CURR_PATH) sys.path.insert(0, CURR_PATH)
@@ -81,50 +134,56 @@ release = xgboost.__version__
# Add any Sphinx extension module names here, as strings. They can be # Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [ extensions = [
'matplotlib.sphinxext.plot_directive', "matplotlib.sphinxext.plot_directive",
'sphinx.ext.autodoc', "sphinxcontrib.jquery",
'sphinx.ext.napoleon', "sphinx.ext.autodoc",
'sphinx.ext.mathjax', "sphinx.ext.napoleon",
'sphinx.ext.intersphinx', "sphinx.ext.mathjax",
"sphinx.ext.intersphinx",
"sphinx_gallery.gen_gallery", "sphinx_gallery.gen_gallery",
'breathe', "breathe",
'recommonmark' "recommonmark",
] ]
sphinx_gallery_conf = { sphinx_gallery_conf = {
# path to your example scripts # path to your example scripts
"examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"], "examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
# path to where to save gallery generated output # path to where to save gallery generated output
"gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"], "gallery_dirs": [
"python/examples",
"python/dask-examples",
"python/survival-examples",
],
"matplotlib_animations": True, "matplotlib_animations": True,
} }
autodoc_typehints = "description" autodoc_typehints = "description"
graphviz_output_format = 'png' graphviz_output_format = "png"
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)] plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
plot_html_show_source_link = False plot_html_show_source_link = False
plot_html_show_formats = False plot_html_show_formats = False
# Breathe extension variables # Breathe extension variables
DOX_DIR = "doxygen" breathe_projects = {}
breathe_projects = { if is_readthedocs_build():
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml") breathe_projects = {
} "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
}
breathe_default_project = "xgboost" breathe_default_project = "xgboost"
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ["_templates"]
# The suffix(es) of source filenames. # The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string: # You can specify multiple suffix as a list of string:
source_suffix = ['.rst', '.md'] source_suffix = [".rst", ".md"]
# The encoding of source files. # The encoding of source files.
# source_encoding = 'utf-8-sig' # source_encoding = 'utf-8-sig'
# The master toctree document. # The master toctree document.
master_doc = 'index' master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
@@ -133,7 +192,7 @@ master_doc = 'index'
# Usually you set "language" from the command line for these cases. # Usually you set "language" from the command line for these cases.
language = "en" language = "en"
autoclass_content = 'both' autoclass_content = "both"
# There are two options for replacing |today|: either, you set today to some # There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used: # non-false value, then it is used:
@@ -143,8 +202,10 @@ autoclass_content = 'both'
# List of patterns, relative to source directory, that match files and # List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files. # directories to ignore when looking for source files.
exclude_patterns = ['_build'] exclude_patterns = ["_build"]
html_extra_path = ['./tmp'] html_extra_path = []
if is_readthedocs_build():
html_extra_path = [TMP_DIR]
# The reST default role (used for this markup: `text`) to use for all # The reST default role (used for this markup: `text`) to use for all
# documents. # documents.
@@ -162,7 +223,7 @@ html_extra_path = ['./tmp']
# show_authors = False # show_authors = False
# The name of the Pygments (syntax highlighting) style to use. # The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx' pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting. # A list of ignored prefixes for module index sorting.
# modindex_common_prefix = [] # modindex_common_prefix = []
@@ -185,27 +246,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo
html_css_files = ["css/custom.css"] html_css_files = ["css/custom.css"]
html_sidebars = { html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}
'**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
}
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static'] html_static_path = ["_static"]
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc' htmlhelp_basename = project + "doc"
# -- Options for LaTeX output --------------------------------------------- # -- Options for LaTeX output ---------------------------------------------
latex_elements = { latex_elements = {}
}
# Grouping the document tree into LaTeX files. List of tuples # Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
(master_doc, '%s.tex' % project, project, author, 'manual'), (master_doc, "%s.tex" % project, project, author, "manual"),
] ]
intersphinx_mapping = { intersphinx_mapping = {
@@ -220,30 +278,5 @@ intersphinx_mapping = {
} }
# hook for doxygen
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
try:
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def generate_doxygen_xml(app):
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
if read_the_docs_build:
run_doxygen()
def setup(app): def setup(app):
app.add_css_file('custom.css') app.add_css_file("custom.css")
app.connect("builder-inited", generate_doxygen_xml)

View File

@@ -107,8 +107,8 @@ virtualenv and pip:
python -m venv xgboost_env python -m venv xgboost_env
source xgboost_env/bin/activate source xgboost_env/bin/activate
pip install pyarrow pandas venv-pack xgboost pip install pyarrow pandas venv-pack xgboost
# https://rapids.ai/pip.html#install # https://docs.rapids.ai/install#pip-install
pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
venv-pack -o xgboost_env.tar.gz venv-pack -o xgboost_env.tar.gz
With Conda: With Conda:
@@ -240,7 +240,7 @@ additional spark configurations and dependencies:
--master spark://<master-ip>:7077 \ --master spark://<master-ip>:7077 \
--conf spark.executor.resource.gpu.amount=1 \ --conf spark.executor.resource.gpu.amount=1 \
--conf spark.task.resource.gpu.amount=1 \ --conf spark.task.resource.gpu.amount=1 \
--packages com.nvidia:rapids-4-spark_2.12:22.08.0 \ --packages com.nvidia:rapids-4-spark_2.12:23.04.0 \
--conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \ --conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
--archives xgboost_env.tar.gz#environment \ --archives xgboost_env.tar.gz#environment \

View File

@@ -508,7 +508,7 @@ class RegTree : public Model {
* \brief drop the trace after fill, must be called after fill. * \brief drop the trace after fill, must be called after fill.
* \param inst The sparse instance to drop. * \param inst The sparse instance to drop.
*/ */
void Drop(const SparsePage::Inst& inst); void Drop();
/*! /*!
* \brief returns the size of the feature vector * \brief returns the size of the feature vector
* \return the size of the feature vector * \return the size of the feature vector
@@ -709,13 +709,10 @@ inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) {
has_missing_ = data_.size() != feature_count; has_missing_ = data_.size() != feature_count;
} }
inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) { inline void RegTree::FVec::Drop() {
for (auto const& entry : inst) { Entry e{};
if (entry.index >= data_.size()) { e.flag = -1;
continue; std::fill_n(data_.data(), data_.size(), e);
}
data_[entry.index].flag = -1;
}
has_missing_ = true; has_missing_ = true;
} }

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 7 #define XGBOOST_VER_MINOR 7
#define XGBOOST_VER_PATCH 5 #define XGBOOST_VER_PATCH 6
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_2.12</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</parent> </parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.7.5</version> <version>1.7.6</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -1 +1 @@
1.7.5 1.7.6

View File

@@ -866,7 +866,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
result_xgb_model = self._convert_to_sklearn_model( result_xgb_model = self._convert_to_sklearn_model(
bytearray(booster, "utf-8"), config bytearray(booster, "utf-8"), config
) )
return self._copyValues(self._create_pyspark_model(result_xgb_model)) spark_model = self._create_pyspark_model(result_xgb_model)
# According to pyspark ML convention, the model uid should be the same
# with estimator uid.
spark_model._resetUid(self.uid)
return self._copyValues(spark_model)
def write(self): def write(self):
""" """

View File

@@ -149,10 +149,28 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const {
return *columns_; return *columns_;
} }
bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {
auto begin = RowIdx(ridx);
if (IsDense()) {
return static_cast<bst_bin_t>(index[begin + fidx]);
}
auto end = RowIdx(ridx + 1);
auto const& cut_ptrs = cut.Ptrs();
auto f_begin = cut_ptrs[fidx];
auto f_end = cut_ptrs[fidx + 1];
return BinarySearchBin(begin, end, index, f_begin, f_end);
}
float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const { float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
auto const &values = cut.Values(); auto const &values = cut.Values();
auto const &mins = cut.MinValues(); auto const &mins = cut.MinValues();
auto const &ptrs = cut.Ptrs(); auto const &ptrs = cut.Ptrs();
return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat);
}
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
std::vector<float> const &values, std::vector<float> const &mins,
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
if (is_cat) { if (is_cat) {
auto f_begin = ptrs[fidx]; auto f_begin = ptrs[fidx];
auto f_end = ptrs[fidx + 1]; auto f_end = ptrs[fidx + 1];
@@ -172,24 +190,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
} }
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx); return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
}; };
switch (columns_->GetColumnType(fidx)) {
if (columns_->GetColumnType(fidx) == common::kDenseColumn) { case common::kDenseColumn: {
if (columns_->AnyMissing()) { if (columns_->AnyMissing()) {
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
return get_bin_val(column);
});
} else {
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
auto bin_idx = column[ridx];
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
});
}
}
case common::kSparseColumn: {
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) { return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx); auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
return get_bin_val(column);
});
} else {
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
return get_bin_val(column); return get_bin_val(column);
}); });
} }
} else {
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
return get_bin_val(column);
});
} }
SPAN_CHECK(false); SPAN_CHECK(false);

View File

@@ -227,7 +227,12 @@ class GHistIndexMatrix {
common::ColumnMatrix const& Transpose() const; common::ColumnMatrix const& Transpose() const;
bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const; float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
float GetFvalue(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,
std::vector<float> const& mins, bst_row_t ridx, bst_feature_t fidx,
bool is_cat) const;
private: private:
std::unique_ptr<common::ColumnMatrix> columns_; std::unique_ptr<common::ColumnMatrix> columns_;

View File

@@ -63,7 +63,7 @@ bst_float PredValue(const SparsePage::Inst &inst,
psum += (*trees[i])[nidx].LeafValue(); psum += (*trees[i])[nidx].LeafValue();
} }
} }
p_feats->Drop(inst); p_feats->Drop();
return psum; return psum;
} }
@@ -116,13 +116,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_
} }
} }
template <typename DataView> void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch, std::vector<RegTree::FVec> *p_feats) {
const size_t fvec_offset, std::vector<RegTree::FVec>* p_feats) {
for (size_t i = 0; i < block_size; ++i) { for (size_t i = 0; i < block_size; ++i) {
RegTree::FVec &feats = (*p_feats)[fvec_offset + i]; RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
const SparsePage::Inst inst = (*batch)[batch_offset + i]; feats.Drop();
feats.Drop(inst);
} }
} }
@@ -142,11 +140,15 @@ struct SparsePageView {
struct GHistIndexMatrixView { struct GHistIndexMatrixView {
private: private:
GHistIndexMatrix const &page_; GHistIndexMatrix const &page_;
uint64_t n_features_; std::uint64_t const n_features_;
common::Span<FeatureType const> ft_; common::Span<FeatureType const> ft_;
common::Span<Entry> workspace_; common::Span<Entry> workspace_;
std::vector<size_t> current_unroll_; std::vector<size_t> current_unroll_;
std::vector<std::uint32_t> const& ptrs_;
std::vector<float> const& mins_;
std::vector<float> const& values_;
public: public:
size_t base_rowid; size_t base_rowid;
@@ -159,6 +161,9 @@ struct GHistIndexMatrixView {
ft_{ft}, ft_{ft},
workspace_{workplace}, workspace_{workplace},
current_unroll_(n_threads > 0 ? n_threads : 1, 0), current_unroll_(n_threads > 0 ? n_threads : 1, 0),
ptrs_{_page.cut.Ptrs()},
mins_{_page.cut.MinValues()},
values_{_page.cut.Values()},
base_rowid{_page.base_rowid} {} base_rowid{_page.base_rowid} {}
SparsePage::Inst operator[](size_t r) { SparsePage::Inst operator[](size_t r) {
@@ -167,7 +172,7 @@ struct GHistIndexMatrixView {
size_t non_missing{beg}; size_t non_missing{beg};
for (bst_feature_t c = 0; c < n_features_; ++c) { for (bst_feature_t c = 0; c < n_features_; ++c) {
float f = page_.GetFvalue(r, c, common::IsCat(ft_, c)); float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
if (!common::CheckNAN(f)) { if (!common::CheckNAN(f)) {
workspace_[non_missing] = Entry{c, f}; workspace_[non_missing] = Entry{c, f};
++non_missing; ++non_missing;
@@ -250,10 +255,9 @@ void PredictBatchByBlockOfRowsKernel(
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
p_thread_temp); p_thread_temp);
// process block of rows through all trees to keep cache locality // process block of rows through all trees to keep cache locality
PredictByAllTrees(model, tree_begin, tree_end, out_preds, PredictByAllTrees(model, tree_begin, tree_end, out_preds, batch_offset + batch.base_rowid,
batch_offset + batch.base_rowid, num_group, thread_temp, num_group, thread_temp, fvec_offset, block_size);
fvec_offset, block_size); FVecDrop(block_size, fvec_offset, p_thread_temp);
FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
}); });
} }
@@ -470,7 +474,7 @@ class CPUPredictor : public Predictor {
bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats); bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid); preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
} }
feats.Drop(page[i]); feats.Drop();
}); });
} }
} }
@@ -544,7 +548,7 @@ class CPUPredictor : public Predictor {
(tree_weights == nullptr ? 1 : (*tree_weights)[j]); (tree_weights == nullptr ? 1 : (*tree_weights)[j]);
} }
} }
feats.Drop(page[i]); feats.Drop();
// add base margin to BIAS // add base margin to BIAS
if (base_margin.Size() != 0) { if (base_margin.Size() != 0) {
CHECK_EQ(base_margin.Shape(1), ngroup); CHECK_EQ(base_margin.Shape(1), ngroup);

View File

@@ -389,6 +389,7 @@ class HistEvaluator {
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child, tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
tree[candidate.nid].SplitIndex(), left_weight, tree[candidate.nid].SplitIndex(), left_weight,
right_weight); right_weight);
evaluator = tree_evaluator_.GetEvaluator();
auto max_node = std::max(left_child, tree[candidate.nid].RightChild()); auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
max_node = std::max(candidate.nid, max_node); max_node = std::max(candidate.nid, max_node);

View File

@@ -48,6 +48,8 @@ class TreeEvaluator {
monotone_.HostVector().resize(n_features, 0); monotone_.HostVector().resize(n_features, 0);
has_constraint_ = false; has_constraint_ = false;
} else { } else {
CHECK_LE(p.monotone_constraints.size(), n_features)
<< "The size of monotone constraint should be less or equal to the number of features.";
monotone_.HostVector() = p.monotone_constraints; monotone_.HostVector() = p.monotone_constraints;
monotone_.HostVector().resize(n_features, 0); monotone_.HostVector().resize(n_features, 0);
// Initialised to some small size, can grow if needed // Initialised to some small size, can grow if needed

View File

@@ -286,7 +286,7 @@ struct GPUHistMakerDevice {
matrix.feature_segments, matrix.feature_segments,
matrix.gidx_fvalue_map, matrix.gidx_fvalue_map,
matrix.min_fvalue, matrix.min_fvalue,
matrix.is_dense matrix.is_dense && !collective::IsDistributed()
}; };
auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs); auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
return split; return split;
@@ -300,11 +300,11 @@ struct GPUHistMakerDevice {
std::vector<bst_node_t> nidx(2 * candidates.size()); std::vector<bst_node_t> nidx(2 * candidates.size());
auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size()); auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
auto matrix = page->GetDeviceAccessor(ctx_->gpu_id); auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
EvaluateSplitSharedInputs shared_inputs{ EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types,
GPUTrainingParam{param}, *quantiser, feature_types, matrix.feature_segments, matrix.feature_segments, matrix.gidx_fvalue_map,
matrix.gidx_fvalue_map, matrix.min_fvalue, matrix.min_fvalue,
matrix.is_dense // is_dense represents the local data
}; matrix.is_dense && !collective::IsDistributed()};
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size()); dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
// Store the feature set ptrs so they dont go out of scope before the kernel is called // Store the feature set ptrs so they dont go out of scope before the kernel is called
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets; std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;

View File

@@ -78,7 +78,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
{ {
GradientPairPrecise grad_stat; GradientPairPrecise grad_stat;
if (p_fmat->IsDense()) { if (p_fmat->IsDense() && !collective::IsDistributed()) {
/** /**
* Specialized code for dense data: For dense data (with no missing value), the sum * Specialized code for dense data: For dense data (with no missing value), the sum
* of gradient histogram is equal to snode[nid] * of gradient histogram is equal to snode[nid]

View File

@@ -89,7 +89,7 @@ class TreeRefresher : public TreeUpdater {
dmlc::BeginPtr(stemp[tid]) + offset); dmlc::BeginPtr(stemp[tid]) + offset);
offset += tree->param.num_nodes; offset += tree->param.num_nodes;
} }
feats.Drop(inst); feats.Drop();
}); });
} }
// aggregate the statistics // aggregate the statistics

View File

@@ -31,6 +31,5 @@ dependencies:
- pyspark - pyspark
- cloudpickle - cloudpickle
- pip: - pip:
- shap
- awscli - awscli
- auditwheel - auditwheel

View File

@@ -34,7 +34,6 @@ dependencies:
- pyarrow - pyarrow
- protobuf - protobuf
- cloudpickle - cloudpickle
- shap
- modin - modin
# TODO: Replace it with pyspark>=3.4 once 3.4 released. # TODO: Replace it with pyspark>=3.4 once 3.4 released.
# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz # - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz

View File

@@ -6,6 +6,9 @@
#include <string> #include <string>
#include "../../../src/tree/constraints.h" #include "../../../src/tree/constraints.h"
#include "../../../src/tree/hist/evaluate_splits.h"
#include "../../../src/tree/hist/expand_entry.h"
#include "../helpers.h"
namespace xgboost { namespace xgboost {
namespace tree { namespace tree {
@@ -56,5 +59,38 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
ASSERT_FALSE(constraints.Query(1, 5)); ASSERT_FALSE(constraints.Query(1, 5));
} }
TEST(CPUMonoConstraint, Basic) {
std::size_t kRows{64}, kCols{16};
Context ctx;
TrainParam param;
std::vector<std::int32_t> mono(kCols, 1);
I32Array arr;
for (std::size_t i = 0; i < kCols; ++i) {
arr.GetArray().push_back(mono[i]);
}
Json jarr{std::move(arr)};
std::string str_mono;
Json::Dump(jarr, &str_mono);
str_mono.front() = '(';
str_mono.back() = ')';
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<CPUExpandEntry> evalutor{param, Xy->Info(), ctx.Threads(), sampler};
evalutor.InitRoot(GradStats{2.0, 2.0});
SplitEntry split;
split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
CPUExpandEntry entry{0, 0, split};
RegTree tree;
tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(kCols)}});
evalutor.ApplyTreeSplit(entry, &tree);
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
}
} // namespace tree } // namespace tree
} // namespace xgboost } // namespace xgboost

View File

@@ -578,7 +578,7 @@ class TestModels:
y = rng.randn(rows) y = rng.randn(rows)
feature_names = ["test_feature_" + str(i) for i in range(cols)] feature_names = ["test_feature_" + str(i) for i in range(cols)]
X_pd = pd.DataFrame(X, columns=feature_names) X_pd = pd.DataFrame(X, columns=feature_names)
X_pd.iloc[:, 3] = X_pd.iloc[:, 3].astype(np.int32) X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)
Xy = xgb.DMatrix(X_pd, y) Xy = xgb.DMatrix(X_pd, y)
assert Xy.feature_types[3] == "int" assert Xy.feature_types[3] == "int"

View File

@@ -75,7 +75,10 @@ class TestPandas:
np.testing.assert_array_equal(result, exp) np.testing.assert_array_equal(result, exp)
dm = xgb.DMatrix(dummies) dm = xgb.DMatrix(dummies)
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z'] assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
assert dm.feature_types == ['int', 'int', 'int', 'int'] if int(pd.__version__[0]) >= 2:
assert dm.feature_types == ['int', 'i', 'i', 'i']
else:
assert dm.feature_types == ['int', 'int', 'int', 'int']
assert dm.num_row() == 3 assert dm.num_row() == 3
assert dm.num_col() == 4 assert dm.num_col() == 4