Compare commits
9 Commits
v1.7.5
...
release_1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36eb41c960 | ||
|
|
39ddf40a8d | ||
|
|
573f1c7db4 | ||
|
|
abc80d2a6d | ||
|
|
e882fb3262 | ||
|
|
3218f6cd3c | ||
|
|
a962611de7 | ||
|
|
14476e8868 | ||
|
|
03f3879b71 |
73
.github/workflows/main.yml
vendored
73
.github/workflows/main.yml
vendored
@@ -148,66 +148,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
LINT_LANG=cpp make lint
|
LINT_LANG=cpp make lint
|
||||||
|
|
||||||
doxygen:
|
python3 dmlc-core/scripts/lint.py --exclude_path \
|
||||||
runs-on: ubuntu-latest
|
python-package/xgboost/dmlc-core \
|
||||||
name: Generate C/C++ API doc using Doxygen
|
python-package/xgboost/include \
|
||||||
steps:
|
python-package/xgboost/lib \
|
||||||
- uses: actions/checkout@v2
|
python-package/xgboost/rabit \
|
||||||
with:
|
python-package/xgboost/src \
|
||||||
submodules: 'true'
|
--pylint-rc python-package/.pylintrc \
|
||||||
- uses: actions/setup-python@v2
|
xgboost \
|
||||||
with:
|
cpp \
|
||||||
python-version: "3.8"
|
include src python-package
|
||||||
architecture: 'x64'
|
|
||||||
- name: Install system packages
|
|
||||||
run: |
|
|
||||||
sudo apt-get install -y --no-install-recommends doxygen graphviz ninja-build
|
|
||||||
python -m pip install wheel setuptools
|
|
||||||
python -m pip install awscli
|
|
||||||
- name: Run Doxygen
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake .. -DBUILD_C_DOC=ON -GNinja
|
|
||||||
ninja -v doc_doxygen
|
|
||||||
- name: Extract branch name
|
|
||||||
shell: bash
|
|
||||||
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
|
|
||||||
id: extract_branch
|
|
||||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
|
||||||
- name: Publish
|
|
||||||
run: |
|
|
||||||
cd build/
|
|
||||||
tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
|
|
||||||
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
|
|
||||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
|
||||||
env:
|
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
|
|
||||||
|
|
||||||
sphinx:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
name: Build docs using Sphinx
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: 'true'
|
|
||||||
- uses: actions/setup-python@v2
|
|
||||||
with:
|
|
||||||
python-version: "3.8"
|
|
||||||
architecture: 'x64'
|
|
||||||
- name: Install system packages
|
|
||||||
run: |
|
|
||||||
sudo apt-get install -y --no-install-recommends graphviz
|
|
||||||
python -m pip install wheel setuptools
|
|
||||||
python -m pip install -r doc/requirements.txt
|
|
||||||
- name: Extract branch name
|
|
||||||
shell: bash
|
|
||||||
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
|
|
||||||
id: extract_branch
|
|
||||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
|
||||||
- name: Run Sphinx
|
|
||||||
run: |
|
|
||||||
make -C doc html
|
|
||||||
env:
|
|
||||||
SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 1.7.5)
|
project(xgboost LANGUAGES CXX C VERSION 1.7.6)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.7.5.1
|
Version: 1.7.6.1
|
||||||
Date: 2023-03-29
|
Date: 2023-06-16
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
|
|||||||
18
R-package/configure
vendored
18
R-package/configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for xgboost 1.7.5.
|
# Generated by GNU Autoconf 2.71 for xgboost 1.7.6.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@@ -607,8 +607,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='xgboost'
|
PACKAGE_NAME='xgboost'
|
||||||
PACKAGE_TARNAME='xgboost'
|
PACKAGE_TARNAME='xgboost'
|
||||||
PACKAGE_VERSION='1.7.5'
|
PACKAGE_VERSION='1.7.6'
|
||||||
PACKAGE_STRING='xgboost 1.7.5'
|
PACKAGE_STRING='xgboost 1.7.6'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures xgboost 1.7.5 to adapt to many kinds of systems.
|
\`configure' configures xgboost 1.7.6 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1287,7 +1287,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of xgboost 1.7.5:";;
|
short | recursive ) echo "Configuration of xgboost 1.7.6:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1367,7 +1367,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
xgboost configure 1.7.5
|
xgboost configure 1.7.6
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by xgboost $as_me 1.7.5, which was
|
It was created by xgboost $as_me 1.7.6, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by xgboost $as_me 1.7.5, which was
|
This file was extended by xgboost $as_me 1.7.6, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
xgboost config.status 1.7.5
|
xgboost config.status 1.7.6
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
AC_PREREQ(2.69)
|
AC_PREREQ(2.69)
|
||||||
|
|
||||||
AC_INIT([xgboost],[1.7.5],[],[xgboost],[])
|
AC_INIT([xgboost],[1.7.6],[],[xgboost],[])
|
||||||
|
|
||||||
: ${R_HOME=`R RHOME`}
|
: ${R_HOME=`R RHOME`}
|
||||||
if test -z "${R_HOME}"; then
|
if test -z "${R_HOME}"; then
|
||||||
|
|||||||
@@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability. Alon
|
|||||||
plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
|
plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
|
||||||
existing c++ headers for gaining more access to the internal of XGBoost.
|
existing c++ headers for gaining more access to the internal of XGBoost.
|
||||||
|
|
||||||
* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_
|
* `C++ interface documentation (latest master branch) <./dev/files.html>`_
|
||||||
* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_
|
* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
|
|||||||
to sphinx with the help of breathe, which doesn't contain links to examples but might be
|
to sphinx with the help of breathe, which doesn't contain links to examples but might be
|
||||||
easier to read. For the original doxygen pages please visit:
|
easier to read. For the original doxygen pages please visit:
|
||||||
|
|
||||||
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
|
* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
|
||||||
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
|
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
|
||||||
|
|
||||||
***************
|
***************
|
||||||
|
|||||||
203
doc/conf.py
203
doc/conf.py
@@ -11,54 +11,107 @@
|
|||||||
#
|
#
|
||||||
# All configuration values have a default; values that are commented out
|
# All configuration values have a default; values that are commented out
|
||||||
# serve to show the default.
|
# serve to show the default.
|
||||||
from subprocess import call
|
|
||||||
from sh.contrib import git
|
|
||||||
import urllib.request
|
|
||||||
from urllib.error import HTTPError
|
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tarfile
|
||||||
|
import urllib.request
|
||||||
|
import warnings
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
|
from sh.contrib import git
|
||||||
|
|
||||||
|
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||||
|
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
|
||||||
|
TMP_DIR = os.path.join(CURR_PATH, "tmp")
|
||||||
|
DOX_DIR = "doxygen"
|
||||||
|
|
||||||
|
|
||||||
|
def run_doxygen():
|
||||||
|
"""Run the doxygen make command in the designated folder."""
|
||||||
|
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
||||||
|
if os.path.exists(TMP_DIR):
|
||||||
|
print(f"Delete directory {TMP_DIR}")
|
||||||
|
shutil.rmtree(TMP_DIR)
|
||||||
|
else:
|
||||||
|
print(f"Create directory {TMP_DIR}")
|
||||||
|
os.mkdir(TMP_DIR)
|
||||||
|
try:
|
||||||
|
os.chdir(PROJECT_ROOT)
|
||||||
|
if not os.path.exists(DOX_DIR):
|
||||||
|
os.mkdir(DOX_DIR)
|
||||||
|
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
|
||||||
|
print(
|
||||||
|
"Build doxygen at {}".format(
|
||||||
|
os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
|
||||||
|
subprocess.check_call(["ninja", "doc_doxygen"])
|
||||||
|
|
||||||
|
src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
|
||||||
|
dest = os.path.join(TMP_DIR, "dev")
|
||||||
|
print(f"Copy directory {src} -> {dest}")
|
||||||
|
shutil.copytree(src, dest)
|
||||||
|
except OSError as e:
|
||||||
|
sys.stderr.write("doxygen execution failed: %s" % e)
|
||||||
|
finally:
|
||||||
|
os.chdir(curdir)
|
||||||
|
|
||||||
|
|
||||||
|
def is_readthedocs_build():
|
||||||
|
if os.environ.get("READTHEDOCS", None) == "True":
|
||||||
|
return True
|
||||||
|
warnings.warn(
|
||||||
|
"Skipping Doxygen build... You won't have documentation for C/C++ functions. "
|
||||||
|
"Set environment variable READTHEDOCS=True if you want to build Doxygen. "
|
||||||
|
"(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
|
||||||
|
"on your system.)"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if is_readthedocs_build():
|
||||||
|
run_doxygen()
|
||||||
|
|
||||||
|
|
||||||
|
git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
|
||||||
if not git_branch:
|
if not git_branch:
|
||||||
# If SPHINX_GIT_BRANCH environment variable is not given, run git
|
# If SPHINX_GIT_BRANCH environment variable is not given, run git
|
||||||
# to determine branch name
|
# to determine branch name
|
||||||
git_branch = [
|
git_branch = [
|
||||||
re.sub(r'origin/', '', x.lstrip(' ')) for x in str(
|
re.sub(r"origin/", "", x.lstrip(" "))
|
||||||
git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')
|
for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
|
||||||
]
|
]
|
||||||
git_branch = [x for x in git_branch if 'HEAD' not in x]
|
git_branch = [x for x in git_branch if "HEAD" not in x]
|
||||||
else:
|
else:
|
||||||
git_branch = [git_branch]
|
git_branch = [git_branch]
|
||||||
print('git_branch = {}'.format(git_branch[0]))
|
print("git_branch = {}".format(git_branch[0]))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
filename, _ = urllib.request.urlretrieve(
|
filename, _ = urllib.request.urlretrieve(
|
||||||
'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(
|
f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
|
||||||
git_branch[0]))
|
)
|
||||||
call(
|
if not os.path.exists(TMP_DIR):
|
||||||
'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'
|
print(f"Create directory {TMP_DIR}")
|
||||||
.format(filename),
|
os.mkdir(TMP_DIR)
|
||||||
shell=True)
|
jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
|
||||||
|
if os.path.exists(jvm_doc_dir):
|
||||||
|
print(f"Delete directory {jvm_doc_dir}")
|
||||||
|
shutil.rmtree(jvm_doc_dir)
|
||||||
|
print(f"Create directory {jvm_doc_dir}")
|
||||||
|
os.mkdir(jvm_doc_dir)
|
||||||
|
|
||||||
|
with tarfile.open(filename, "r:bz2") as t:
|
||||||
|
t.extractall(jvm_doc_dir)
|
||||||
except HTTPError:
|
except HTTPError:
|
||||||
print('JVM doc not found. Skipping...')
|
print("JVM doc not found. Skipping...")
|
||||||
try:
|
|
||||||
filename, _ = urllib.request.urlretrieve(
|
|
||||||
'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
|
|
||||||
format(git_branch[0]))
|
|
||||||
call(
|
|
||||||
'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
|
|
||||||
.format(filename),
|
|
||||||
shell=True)
|
|
||||||
except HTTPError:
|
|
||||||
print('C API doc not found. Skipping...')
|
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
|
||||||
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
|
|
||||||
libpath = os.path.join(PROJECT_ROOT, "python-package/")
|
libpath = os.path.join(PROJECT_ROOT, "python-package/")
|
||||||
sys.path.insert(0, libpath)
|
sys.path.insert(0, libpath)
|
||||||
sys.path.insert(0, CURR_PATH)
|
sys.path.insert(0, CURR_PATH)
|
||||||
@@ -81,50 +134,56 @@ release = xgboost.__version__
|
|||||||
# Add any Sphinx extension module names here, as strings. They can be
|
# Add any Sphinx extension module names here, as strings. They can be
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
|
||||||
extensions = [
|
extensions = [
|
||||||
'matplotlib.sphinxext.plot_directive',
|
"matplotlib.sphinxext.plot_directive",
|
||||||
'sphinx.ext.autodoc',
|
"sphinxcontrib.jquery",
|
||||||
'sphinx.ext.napoleon',
|
"sphinx.ext.autodoc",
|
||||||
'sphinx.ext.mathjax',
|
"sphinx.ext.napoleon",
|
||||||
'sphinx.ext.intersphinx',
|
"sphinx.ext.mathjax",
|
||||||
|
"sphinx.ext.intersphinx",
|
||||||
"sphinx_gallery.gen_gallery",
|
"sphinx_gallery.gen_gallery",
|
||||||
'breathe',
|
"breathe",
|
||||||
'recommonmark'
|
"recommonmark",
|
||||||
]
|
]
|
||||||
|
|
||||||
sphinx_gallery_conf = {
|
sphinx_gallery_conf = {
|
||||||
# path to your example scripts
|
# path to your example scripts
|
||||||
"examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
|
"examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
|
||||||
# path to where to save gallery generated output
|
# path to where to save gallery generated output
|
||||||
"gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"],
|
"gallery_dirs": [
|
||||||
|
"python/examples",
|
||||||
|
"python/dask-examples",
|
||||||
|
"python/survival-examples",
|
||||||
|
],
|
||||||
"matplotlib_animations": True,
|
"matplotlib_animations": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
autodoc_typehints = "description"
|
autodoc_typehints = "description"
|
||||||
|
|
||||||
graphviz_output_format = 'png'
|
graphviz_output_format = "png"
|
||||||
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
|
plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
|
||||||
plot_html_show_source_link = False
|
plot_html_show_source_link = False
|
||||||
plot_html_show_formats = False
|
plot_html_show_formats = False
|
||||||
|
|
||||||
# Breathe extension variables
|
# Breathe extension variables
|
||||||
DOX_DIR = "doxygen"
|
breathe_projects = {}
|
||||||
breathe_projects = {
|
if is_readthedocs_build():
|
||||||
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
|
breathe_projects = {
|
||||||
}
|
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
|
||||||
|
}
|
||||||
breathe_default_project = "xgboost"
|
breathe_default_project = "xgboost"
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
templates_path = ['_templates']
|
templates_path = ["_templates"]
|
||||||
|
|
||||||
# The suffix(es) of source filenames.
|
# The suffix(es) of source filenames.
|
||||||
# You can specify multiple suffix as a list of string:
|
# You can specify multiple suffix as a list of string:
|
||||||
source_suffix = ['.rst', '.md']
|
source_suffix = [".rst", ".md"]
|
||||||
|
|
||||||
# The encoding of source files.
|
# The encoding of source files.
|
||||||
# source_encoding = 'utf-8-sig'
|
# source_encoding = 'utf-8-sig'
|
||||||
|
|
||||||
# The master toctree document.
|
# The master toctree document.
|
||||||
master_doc = 'index'
|
master_doc = "index"
|
||||||
|
|
||||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
# for a list of supported languages.
|
# for a list of supported languages.
|
||||||
@@ -133,7 +192,7 @@ master_doc = 'index'
|
|||||||
# Usually you set "language" from the command line for these cases.
|
# Usually you set "language" from the command line for these cases.
|
||||||
language = "en"
|
language = "en"
|
||||||
|
|
||||||
autoclass_content = 'both'
|
autoclass_content = "both"
|
||||||
|
|
||||||
# There are two options for replacing |today|: either, you set today to some
|
# There are two options for replacing |today|: either, you set today to some
|
||||||
# non-false value, then it is used:
|
# non-false value, then it is used:
|
||||||
@@ -143,8 +202,10 @@ autoclass_content = 'both'
|
|||||||
|
|
||||||
# List of patterns, relative to source directory, that match files and
|
# List of patterns, relative to source directory, that match files and
|
||||||
# directories to ignore when looking for source files.
|
# directories to ignore when looking for source files.
|
||||||
exclude_patterns = ['_build']
|
exclude_patterns = ["_build"]
|
||||||
html_extra_path = ['./tmp']
|
html_extra_path = []
|
||||||
|
if is_readthedocs_build():
|
||||||
|
html_extra_path = [TMP_DIR]
|
||||||
|
|
||||||
# The reST default role (used for this markup: `text`) to use for all
|
# The reST default role (used for this markup: `text`) to use for all
|
||||||
# documents.
|
# documents.
|
||||||
@@ -162,7 +223,7 @@ html_extra_path = ['./tmp']
|
|||||||
# show_authors = False
|
# show_authors = False
|
||||||
|
|
||||||
# The name of the Pygments (syntax highlighting) style to use.
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
pygments_style = 'sphinx'
|
pygments_style = "sphinx"
|
||||||
|
|
||||||
# A list of ignored prefixes for module index sorting.
|
# A list of ignored prefixes for module index sorting.
|
||||||
# modindex_common_prefix = []
|
# modindex_common_prefix = []
|
||||||
@@ -185,27 +246,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo
|
|||||||
|
|
||||||
html_css_files = ["css/custom.css"]
|
html_css_files = ["css/custom.css"]
|
||||||
|
|
||||||
html_sidebars = {
|
html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}
|
||||||
'**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
html_static_path = ['_static']
|
html_static_path = ["_static"]
|
||||||
|
|
||||||
# Output file base name for HTML help builder.
|
# Output file base name for HTML help builder.
|
||||||
htmlhelp_basename = project + 'doc'
|
htmlhelp_basename = project + "doc"
|
||||||
|
|
||||||
# -- Options for LaTeX output ---------------------------------------------
|
# -- Options for LaTeX output ---------------------------------------------
|
||||||
latex_elements = {
|
latex_elements = {}
|
||||||
}
|
|
||||||
|
|
||||||
# Grouping the document tree into LaTeX files. List of tuples
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
# (source start file, target name, title,
|
# (source start file, target name, title,
|
||||||
# author, documentclass [howto, manual, or own class]).
|
# author, documentclass [howto, manual, or own class]).
|
||||||
latex_documents = [
|
latex_documents = [
|
||||||
(master_doc, '%s.tex' % project, project, author, 'manual'),
|
(master_doc, "%s.tex" % project, project, author, "manual"),
|
||||||
]
|
]
|
||||||
|
|
||||||
intersphinx_mapping = {
|
intersphinx_mapping = {
|
||||||
@@ -220,30 +278,5 @@ intersphinx_mapping = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# hook for doxygen
|
|
||||||
def run_doxygen():
|
|
||||||
"""Run the doxygen make command in the designated folder."""
|
|
||||||
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
|
||||||
try:
|
|
||||||
os.chdir(PROJECT_ROOT)
|
|
||||||
if not os.path.exists(DOX_DIR):
|
|
||||||
os.mkdir(DOX_DIR)
|
|
||||||
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
|
|
||||||
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
|
|
||||||
subprocess.check_call(["ninja", "doc_doxygen"])
|
|
||||||
except OSError as e:
|
|
||||||
sys.stderr.write("doxygen execution failed: %s" % e)
|
|
||||||
finally:
|
|
||||||
os.chdir(curdir)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_doxygen_xml(app):
|
|
||||||
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
|
|
||||||
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
|
|
||||||
if read_the_docs_build:
|
|
||||||
run_doxygen()
|
|
||||||
|
|
||||||
|
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.add_css_file('custom.css')
|
app.add_css_file("custom.css")
|
||||||
app.connect("builder-inited", generate_doxygen_xml)
|
|
||||||
|
|||||||
@@ -107,8 +107,8 @@ virtualenv and pip:
|
|||||||
python -m venv xgboost_env
|
python -m venv xgboost_env
|
||||||
source xgboost_env/bin/activate
|
source xgboost_env/bin/activate
|
||||||
pip install pyarrow pandas venv-pack xgboost
|
pip install pyarrow pandas venv-pack xgboost
|
||||||
# https://rapids.ai/pip.html#install
|
# https://docs.rapids.ai/install#pip-install
|
||||||
pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
|
pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
|
||||||
venv-pack -o xgboost_env.tar.gz
|
venv-pack -o xgboost_env.tar.gz
|
||||||
|
|
||||||
With Conda:
|
With Conda:
|
||||||
@@ -240,7 +240,7 @@ additional spark configurations and dependencies:
|
|||||||
--master spark://<master-ip>:7077 \
|
--master spark://<master-ip>:7077 \
|
||||||
--conf spark.executor.resource.gpu.amount=1 \
|
--conf spark.executor.resource.gpu.amount=1 \
|
||||||
--conf spark.task.resource.gpu.amount=1 \
|
--conf spark.task.resource.gpu.amount=1 \
|
||||||
--packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
|
--packages com.nvidia:rapids-4-spark_2.12:23.04.0 \
|
||||||
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
|
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
|
||||||
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
|
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
|
||||||
--archives xgboost_env.tar.gz#environment \
|
--archives xgboost_env.tar.gz#environment \
|
||||||
|
|||||||
@@ -508,7 +508,7 @@ class RegTree : public Model {
|
|||||||
* \brief drop the trace after fill, must be called after fill.
|
* \brief drop the trace after fill, must be called after fill.
|
||||||
* \param inst The sparse instance to drop.
|
* \param inst The sparse instance to drop.
|
||||||
*/
|
*/
|
||||||
void Drop(const SparsePage::Inst& inst);
|
void Drop();
|
||||||
/*!
|
/*!
|
||||||
* \brief returns the size of the feature vector
|
* \brief returns the size of the feature vector
|
||||||
* \return the size of the feature vector
|
* \return the size of the feature vector
|
||||||
@@ -709,13 +709,10 @@ inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) {
|
|||||||
has_missing_ = data_.size() != feature_count;
|
has_missing_ = data_.size() != feature_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) {
|
inline void RegTree::FVec::Drop() {
|
||||||
for (auto const& entry : inst) {
|
Entry e{};
|
||||||
if (entry.index >= data_.size()) {
|
e.flag = -1;
|
||||||
continue;
|
std::fill_n(data_.data(), data_.size(), e);
|
||||||
}
|
|
||||||
data_[entry.index].flag = -1;
|
|
||||||
}
|
|
||||||
has_missing_ = true;
|
has_missing_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 1
|
#define XGBOOST_VER_MAJOR 1
|
||||||
#define XGBOOST_VER_MINOR 7
|
#define XGBOOST_VER_MINOR 7
|
||||||
#define XGBOOST_VER_PATCH 5
|
#define XGBOOST_VER_PATCH 6
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.7.5</version>
|
<version>1.7.6</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.7.5
|
1.7.6
|
||||||
|
|||||||
@@ -866,7 +866,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
result_xgb_model = self._convert_to_sklearn_model(
|
result_xgb_model = self._convert_to_sklearn_model(
|
||||||
bytearray(booster, "utf-8"), config
|
bytearray(booster, "utf-8"), config
|
||||||
)
|
)
|
||||||
return self._copyValues(self._create_pyspark_model(result_xgb_model))
|
spark_model = self._create_pyspark_model(result_xgb_model)
|
||||||
|
# According to pyspark ML convention, the model uid should be the same
|
||||||
|
# with estimator uid.
|
||||||
|
spark_model._resetUid(self.uid)
|
||||||
|
return self._copyValues(spark_model)
|
||||||
|
|
||||||
def write(self):
|
def write(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -149,10 +149,28 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const {
|
|||||||
return *columns_;
|
return *columns_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {
|
||||||
|
auto begin = RowIdx(ridx);
|
||||||
|
if (IsDense()) {
|
||||||
|
return static_cast<bst_bin_t>(index[begin + fidx]);
|
||||||
|
}
|
||||||
|
auto end = RowIdx(ridx + 1);
|
||||||
|
auto const& cut_ptrs = cut.Ptrs();
|
||||||
|
auto f_begin = cut_ptrs[fidx];
|
||||||
|
auto f_end = cut_ptrs[fidx + 1];
|
||||||
|
return BinarySearchBin(begin, end, index, f_begin, f_end);
|
||||||
|
}
|
||||||
|
|
||||||
float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||||
auto const &values = cut.Values();
|
auto const &values = cut.Values();
|
||||||
auto const &mins = cut.MinValues();
|
auto const &mins = cut.MinValues();
|
||||||
auto const &ptrs = cut.Ptrs();
|
auto const &ptrs = cut.Ptrs();
|
||||||
|
return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat);
|
||||||
|
}
|
||||||
|
|
||||||
|
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
|
||||||
|
std::vector<float> const &values, std::vector<float> const &mins,
|
||||||
|
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
|
||||||
if (is_cat) {
|
if (is_cat) {
|
||||||
auto f_begin = ptrs[fidx];
|
auto f_begin = ptrs[fidx];
|
||||||
auto f_end = ptrs[fidx + 1];
|
auto f_end = ptrs[fidx + 1];
|
||||||
@@ -172,24 +190,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
|||||||
}
|
}
|
||||||
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
||||||
};
|
};
|
||||||
|
switch (columns_->GetColumnType(fidx)) {
|
||||||
if (columns_->GetColumnType(fidx) == common::kDenseColumn) {
|
case common::kDenseColumn: {
|
||||||
if (columns_->AnyMissing()) {
|
if (columns_->AnyMissing()) {
|
||||||
|
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||||
|
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
|
||||||
|
return get_bin_val(column);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||||
|
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
|
||||||
|
auto bin_idx = column[ridx];
|
||||||
|
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case common::kSparseColumn: {
|
||||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||||
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
|
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
|
||||||
return get_bin_val(column);
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
|
||||||
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
|
|
||||||
return get_bin_val(column);
|
return get_bin_val(column);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
|
||||||
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
|
|
||||||
return get_bin_val(column);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SPAN_CHECK(false);
|
SPAN_CHECK(false);
|
||||||
|
|||||||
@@ -227,7 +227,12 @@ class GHistIndexMatrix {
|
|||||||
|
|
||||||
common::ColumnMatrix const& Transpose() const;
|
common::ColumnMatrix const& Transpose() const;
|
||||||
|
|
||||||
|
bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
|
||||||
|
|
||||||
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
||||||
|
float GetFvalue(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,
|
||||||
|
std::vector<float> const& mins, bst_row_t ridx, bst_feature_t fidx,
|
||||||
|
bool is_cat) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<common::ColumnMatrix> columns_;
|
std::unique_ptr<common::ColumnMatrix> columns_;
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ bst_float PredValue(const SparsePage::Inst &inst,
|
|||||||
psum += (*trees[i])[nidx].LeafValue();
|
psum += (*trees[i])[nidx].LeafValue();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p_feats->Drop(inst);
|
p_feats->Drop();
|
||||||
return psum;
|
return psum;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,13 +116,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename DataView>
|
void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
|
||||||
void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch,
|
std::vector<RegTree::FVec> *p_feats) {
|
||||||
const size_t fvec_offset, std::vector<RegTree::FVec>* p_feats) {
|
|
||||||
for (size_t i = 0; i < block_size; ++i) {
|
for (size_t i = 0; i < block_size; ++i) {
|
||||||
RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
|
RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
|
||||||
const SparsePage::Inst inst = (*batch)[batch_offset + i];
|
feats.Drop();
|
||||||
feats.Drop(inst);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,11 +140,15 @@ struct SparsePageView {
|
|||||||
struct GHistIndexMatrixView {
|
struct GHistIndexMatrixView {
|
||||||
private:
|
private:
|
||||||
GHistIndexMatrix const &page_;
|
GHistIndexMatrix const &page_;
|
||||||
uint64_t n_features_;
|
std::uint64_t const n_features_;
|
||||||
common::Span<FeatureType const> ft_;
|
common::Span<FeatureType const> ft_;
|
||||||
common::Span<Entry> workspace_;
|
common::Span<Entry> workspace_;
|
||||||
std::vector<size_t> current_unroll_;
|
std::vector<size_t> current_unroll_;
|
||||||
|
|
||||||
|
std::vector<std::uint32_t> const& ptrs_;
|
||||||
|
std::vector<float> const& mins_;
|
||||||
|
std::vector<float> const& values_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
size_t base_rowid;
|
size_t base_rowid;
|
||||||
|
|
||||||
@@ -159,6 +161,9 @@ struct GHistIndexMatrixView {
|
|||||||
ft_{ft},
|
ft_{ft},
|
||||||
workspace_{workplace},
|
workspace_{workplace},
|
||||||
current_unroll_(n_threads > 0 ? n_threads : 1, 0),
|
current_unroll_(n_threads > 0 ? n_threads : 1, 0),
|
||||||
|
ptrs_{_page.cut.Ptrs()},
|
||||||
|
mins_{_page.cut.MinValues()},
|
||||||
|
values_{_page.cut.Values()},
|
||||||
base_rowid{_page.base_rowid} {}
|
base_rowid{_page.base_rowid} {}
|
||||||
|
|
||||||
SparsePage::Inst operator[](size_t r) {
|
SparsePage::Inst operator[](size_t r) {
|
||||||
@@ -167,7 +172,7 @@ struct GHistIndexMatrixView {
|
|||||||
size_t non_missing{beg};
|
size_t non_missing{beg};
|
||||||
|
|
||||||
for (bst_feature_t c = 0; c < n_features_; ++c) {
|
for (bst_feature_t c = 0; c < n_features_; ++c) {
|
||||||
float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
|
float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
|
||||||
if (!common::CheckNAN(f)) {
|
if (!common::CheckNAN(f)) {
|
||||||
workspace_[non_missing] = Entry{c, f};
|
workspace_[non_missing] = Entry{c, f};
|
||||||
++non_missing;
|
++non_missing;
|
||||||
@@ -250,10 +255,9 @@ void PredictBatchByBlockOfRowsKernel(
|
|||||||
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
|
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
|
||||||
p_thread_temp);
|
p_thread_temp);
|
||||||
// process block of rows through all trees to keep cache locality
|
// process block of rows through all trees to keep cache locality
|
||||||
PredictByAllTrees(model, tree_begin, tree_end, out_preds,
|
PredictByAllTrees(model, tree_begin, tree_end, out_preds, batch_offset + batch.base_rowid,
|
||||||
batch_offset + batch.base_rowid, num_group, thread_temp,
|
num_group, thread_temp, fvec_offset, block_size);
|
||||||
fvec_offset, block_size);
|
FVecDrop(block_size, fvec_offset, p_thread_temp);
|
||||||
FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -470,7 +474,7 @@ class CPUPredictor : public Predictor {
|
|||||||
bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
|
bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
|
||||||
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
|
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
|
||||||
}
|
}
|
||||||
feats.Drop(page[i]);
|
feats.Drop();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -544,7 +548,7 @@ class CPUPredictor : public Predictor {
|
|||||||
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
feats.Drop(page[i]);
|
feats.Drop();
|
||||||
// add base margin to BIAS
|
// add base margin to BIAS
|
||||||
if (base_margin.Size() != 0) {
|
if (base_margin.Size() != 0) {
|
||||||
CHECK_EQ(base_margin.Shape(1), ngroup);
|
CHECK_EQ(base_margin.Shape(1), ngroup);
|
||||||
|
|||||||
@@ -389,6 +389,7 @@ class HistEvaluator {
|
|||||||
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
|
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
|
||||||
tree[candidate.nid].SplitIndex(), left_weight,
|
tree[candidate.nid].SplitIndex(), left_weight,
|
||||||
right_weight);
|
right_weight);
|
||||||
|
evaluator = tree_evaluator_.GetEvaluator();
|
||||||
|
|
||||||
auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
|
auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
|
||||||
max_node = std::max(candidate.nid, max_node);
|
max_node = std::max(candidate.nid, max_node);
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ class TreeEvaluator {
|
|||||||
monotone_.HostVector().resize(n_features, 0);
|
monotone_.HostVector().resize(n_features, 0);
|
||||||
has_constraint_ = false;
|
has_constraint_ = false;
|
||||||
} else {
|
} else {
|
||||||
|
CHECK_LE(p.monotone_constraints.size(), n_features)
|
||||||
|
<< "The size of monotone constraint should be less or equal to the number of features.";
|
||||||
monotone_.HostVector() = p.monotone_constraints;
|
monotone_.HostVector() = p.monotone_constraints;
|
||||||
monotone_.HostVector().resize(n_features, 0);
|
monotone_.HostVector().resize(n_features, 0);
|
||||||
// Initialised to some small size, can grow if needed
|
// Initialised to some small size, can grow if needed
|
||||||
|
|||||||
@@ -286,7 +286,7 @@ struct GPUHistMakerDevice {
|
|||||||
matrix.feature_segments,
|
matrix.feature_segments,
|
||||||
matrix.gidx_fvalue_map,
|
matrix.gidx_fvalue_map,
|
||||||
matrix.min_fvalue,
|
matrix.min_fvalue,
|
||||||
matrix.is_dense
|
matrix.is_dense && !collective::IsDistributed()
|
||||||
};
|
};
|
||||||
auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
|
auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
|
||||||
return split;
|
return split;
|
||||||
@@ -300,11 +300,11 @@ struct GPUHistMakerDevice {
|
|||||||
std::vector<bst_node_t> nidx(2 * candidates.size());
|
std::vector<bst_node_t> nidx(2 * candidates.size());
|
||||||
auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
|
auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
|
||||||
auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
||||||
EvaluateSplitSharedInputs shared_inputs{
|
EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types,
|
||||||
GPUTrainingParam{param}, *quantiser, feature_types, matrix.feature_segments,
|
matrix.feature_segments, matrix.gidx_fvalue_map,
|
||||||
matrix.gidx_fvalue_map, matrix.min_fvalue,
|
matrix.min_fvalue,
|
||||||
matrix.is_dense
|
// is_dense represents the local data
|
||||||
};
|
matrix.is_dense && !collective::IsDistributed()};
|
||||||
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
||||||
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
||||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
|
|||||||
|
|
||||||
{
|
{
|
||||||
GradientPairPrecise grad_stat;
|
GradientPairPrecise grad_stat;
|
||||||
if (p_fmat->IsDense()) {
|
if (p_fmat->IsDense() && !collective::IsDistributed()) {
|
||||||
/**
|
/**
|
||||||
* Specialized code for dense data: For dense data (with no missing value), the sum
|
* Specialized code for dense data: For dense data (with no missing value), the sum
|
||||||
* of gradient histogram is equal to snode[nid]
|
* of gradient histogram is equal to snode[nid]
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class TreeRefresher : public TreeUpdater {
|
|||||||
dmlc::BeginPtr(stemp[tid]) + offset);
|
dmlc::BeginPtr(stemp[tid]) + offset);
|
||||||
offset += tree->param.num_nodes;
|
offset += tree->param.num_nodes;
|
||||||
}
|
}
|
||||||
feats.Drop(inst);
|
feats.Drop();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
// aggregate the statistics
|
// aggregate the statistics
|
||||||
|
|||||||
@@ -31,6 +31,5 @@ dependencies:
|
|||||||
- pyspark
|
- pyspark
|
||||||
- cloudpickle
|
- cloudpickle
|
||||||
- pip:
|
- pip:
|
||||||
- shap
|
|
||||||
- awscli
|
- awscli
|
||||||
- auditwheel
|
- auditwheel
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ dependencies:
|
|||||||
- pyarrow
|
- pyarrow
|
||||||
- protobuf
|
- protobuf
|
||||||
- cloudpickle
|
- cloudpickle
|
||||||
- shap
|
|
||||||
- modin
|
- modin
|
||||||
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
|
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
|
||||||
# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
|
# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
|
||||||
|
|||||||
@@ -6,6 +6,9 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "../../../src/tree/constraints.h"
|
#include "../../../src/tree/constraints.h"
|
||||||
|
#include "../../../src/tree/hist/evaluate_splits.h"
|
||||||
|
#include "../../../src/tree/hist/expand_entry.h"
|
||||||
|
#include "../helpers.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace tree {
|
namespace tree {
|
||||||
@@ -56,5 +59,38 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
|
|||||||
ASSERT_FALSE(constraints.Query(1, 5));
|
ASSERT_FALSE(constraints.Query(1, 5));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CPUMonoConstraint, Basic) {
|
||||||
|
std::size_t kRows{64}, kCols{16};
|
||||||
|
Context ctx;
|
||||||
|
|
||||||
|
TrainParam param;
|
||||||
|
std::vector<std::int32_t> mono(kCols, 1);
|
||||||
|
I32Array arr;
|
||||||
|
for (std::size_t i = 0; i < kCols; ++i) {
|
||||||
|
arr.GetArray().push_back(mono[i]);
|
||||||
|
}
|
||||||
|
Json jarr{std::move(arr)};
|
||||||
|
std::string str_mono;
|
||||||
|
Json::Dump(jarr, &str_mono);
|
||||||
|
str_mono.front() = '(';
|
||||||
|
str_mono.back() = ')';
|
||||||
|
|
||||||
|
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
|
||||||
|
|
||||||
|
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
|
||||||
|
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||||
|
|
||||||
|
HistEvaluator<CPUExpandEntry> evalutor{param, Xy->Info(), ctx.Threads(), sampler};
|
||||||
|
evalutor.InitRoot(GradStats{2.0, 2.0});
|
||||||
|
|
||||||
|
SplitEntry split;
|
||||||
|
split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
|
||||||
|
CPUExpandEntry entry{0, 0, split};
|
||||||
|
RegTree tree;
|
||||||
|
tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(kCols)}});
|
||||||
|
evalutor.ApplyTreeSplit(entry, &tree);
|
||||||
|
|
||||||
|
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
|
||||||
|
}
|
||||||
} // namespace tree
|
} // namespace tree
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@@ -578,7 +578,7 @@ class TestModels:
|
|||||||
y = rng.randn(rows)
|
y = rng.randn(rows)
|
||||||
feature_names = ["test_feature_" + str(i) for i in range(cols)]
|
feature_names = ["test_feature_" + str(i) for i in range(cols)]
|
||||||
X_pd = pd.DataFrame(X, columns=feature_names)
|
X_pd = pd.DataFrame(X, columns=feature_names)
|
||||||
X_pd.iloc[:, 3] = X_pd.iloc[:, 3].astype(np.int32)
|
X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)
|
||||||
|
|
||||||
Xy = xgb.DMatrix(X_pd, y)
|
Xy = xgb.DMatrix(X_pd, y)
|
||||||
assert Xy.feature_types[3] == "int"
|
assert Xy.feature_types[3] == "int"
|
||||||
|
|||||||
@@ -75,7 +75,10 @@ class TestPandas:
|
|||||||
np.testing.assert_array_equal(result, exp)
|
np.testing.assert_array_equal(result, exp)
|
||||||
dm = xgb.DMatrix(dummies)
|
dm = xgb.DMatrix(dummies)
|
||||||
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
|
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
|
||||||
assert dm.feature_types == ['int', 'int', 'int', 'int']
|
if int(pd.__version__[0]) >= 2:
|
||||||
|
assert dm.feature_types == ['int', 'i', 'i', 'i']
|
||||||
|
else:
|
||||||
|
assert dm.feature_types == ['int', 'int', 'int', 'int']
|
||||||
assert dm.num_row() == 3
|
assert dm.num_row() == 3
|
||||||
assert dm.num_col() == 4
|
assert dm.num_col() == 4
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user