merge 23Mar01
This commit is contained in:
@@ -1,56 +0,0 @@
|
||||
include README.rst
|
||||
include xgboost/LICENSE
|
||||
include xgboost/VERSION
|
||||
include xgboost/CMakeLists.txt
|
||||
|
||||
include xgboost/py.typed
|
||||
recursive-include xgboost *.py
|
||||
recursive-include xgboost/cmake *
|
||||
exclude xgboost/cmake/RPackageInstall.cmake.in
|
||||
exclude xgboost/cmake/RPackageInstallTargetSetup.cmake
|
||||
exclude xgboost/cmake/Sanitizer.cmake
|
||||
exclude xgboost/cmake/modules/FindASan.cmake
|
||||
exclude xgboost/cmake/modules/FindLSan.cmake
|
||||
exclude xgboost/cmake/modules/FindLibR.cmake
|
||||
exclude xgboost/cmake/modules/FindTSan.cmake
|
||||
exclude xgboost/cmake/modules/FindUBSan.cmake
|
||||
recursive-include xgboost/include *
|
||||
recursive-include xgboost/plugin *
|
||||
recursive-include xgboost/src *
|
||||
|
||||
recursive-include xgboost/gputreeshap/GPUTreeShap *
|
||||
|
||||
include xgboost/rabit/CMakeLists.txt
|
||||
recursive-include xgboost/rabit/include *
|
||||
recursive-include xgboost/rabit/src *
|
||||
prune xgboost/rabit/doc
|
||||
prune xgboost/rabit/guide
|
||||
|
||||
include xgboost/dmlc-core/CMakeLists.txt
|
||||
|
||||
recursive-include xgboost/dmlc-core/cmake *
|
||||
exclude xgboost/dmlc-core/cmake/gtest_cmake.in
|
||||
exclude xgboost/dmlc-core/cmake/lint.cmake
|
||||
exclude xgboost/dmlc-core/cmake/Sanitizer.cmake
|
||||
exclude xgboost/dmlc-core/cmake/Modules/FindASan.cmake
|
||||
exclude xgboost/dmlc-core/cmake/Modules/FindLSan.cmake
|
||||
exclude xgboost/dmlc-core/cmake/Modules/FindTSan.cmake
|
||||
exclude xgboost/dmlc-core/cmake/Modules/FindUBSan.cmake
|
||||
|
||||
recursive-include xgboost/dmlc-core/include *
|
||||
recursive-include xgboost/dmlc-core/include *
|
||||
recursive-include xgboost/dmlc-core/make *
|
||||
recursive-include xgboost/dmlc-core/src *
|
||||
include xgboost/dmlc-core/tracker/dmlc-submit
|
||||
recursive-include xgboost/dmlc-core/tracker/dmlc_tracker *.py
|
||||
include xgboost/dmlc-core/tracker/yarn/build.bat
|
||||
include xgboost/dmlc-core/tracker/yarn/build.sh
|
||||
include xgboost/dmlc-core/tracker/yarn/pom.xml
|
||||
recursive-include xgboost/dmlc-core/tracker/yarn/src *
|
||||
include xgboost/dmlc-core/windows/dmlc.sln
|
||||
include xgboost/dmlc-core/windows/dmlc/dmlc.vcxproj
|
||||
|
||||
prune xgboost/dmlc-core/doc
|
||||
prune xgboost/dmlc-core/scripts/
|
||||
|
||||
global-exclude *.py[oc]
|
||||
22
python-package/hatch_build.py
Normal file
22
python-package/hatch_build.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Custom hook to customize the behavior of Hatchling.
|
||||
Here, we customize the tag of the generated wheels.
|
||||
"""
|
||||
import sysconfig
|
||||
from typing import Any, Dict
|
||||
|
||||
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
|
||||
|
||||
|
||||
def get_tag() -> str:
|
||||
"""Get appropriate wheel tag according to system"""
|
||||
tag_platform = sysconfig.get_platform().replace("-", "_").replace(".", "_")
|
||||
return f"py3-none-{tag_platform}"
|
||||
|
||||
|
||||
class CustomBuildHook(BuildHookInterface):
|
||||
"""A custom build hook"""
|
||||
|
||||
def initialize(self, version: str, build_data: Dict[str, Any]) -> None:
|
||||
"""This step ccurs immediately before each build."""
|
||||
build_data["tag"] = get_tag()
|
||||
0
python-package/packager/__init__.py
Normal file
0
python-package/packager/__init__.py
Normal file
56
python-package/packager/build_config.py
Normal file
56
python-package/packager/build_config.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Build configuration"""
|
||||
import dataclasses
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BuildConfiguration: # pylint: disable=R0902
|
||||
"""Configurations use when building libxgboost"""
|
||||
|
||||
# Whether to hide C++ symbols in libxgboost.so
|
||||
hide_cxx_symbols: bool = True
|
||||
# Whether to enable OpenMP
|
||||
use_openmp: bool = True
|
||||
# Whether to enable CUDA
|
||||
use_cuda: bool = False
|
||||
# Whether to enable NCCL
|
||||
use_nccl: bool = False
|
||||
# Whether to enable HDFS
|
||||
use_hdfs: bool = False
|
||||
# Whether to enable Azure Storage
|
||||
use_azure: bool = False
|
||||
# Whether to enable AWS S3
|
||||
use_s3: bool = False
|
||||
# Whether to enable the dense parser plugin
|
||||
plugin_dense_parser: bool = False
|
||||
# Special option: See explanation below
|
||||
use_system_libxgboost: bool = False
|
||||
|
||||
def _set_config_setting(
|
||||
self, config_settings: Dict[str, Any], field_name: str
|
||||
) -> None:
|
||||
if field_name in config_settings:
|
||||
setattr(
|
||||
self,
|
||||
field_name,
|
||||
(config_settings[field_name].lower() in ["true", "1", "on"]),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Field {field_name} is not a valid config_settings")
|
||||
|
||||
def update(self, config_settings: Optional[Dict[str, Any]]) -> None:
|
||||
"""Parse config_settings from Pip (or other PEP 517 frontend)"""
|
||||
if config_settings is not None:
|
||||
for field_name in [x.name for x in dataclasses.fields(self)]:
|
||||
self._set_config_setting(config_settings, field_name)
|
||||
|
||||
def get_cmake_args(self) -> List[str]:
|
||||
"""Convert build configuration to CMake args"""
|
||||
cmake_args = []
|
||||
for field_name in [x.name for x in dataclasses.fields(self)]:
|
||||
if field_name in ["use_system_libxgboost"]:
|
||||
continue
|
||||
cmake_option = field_name.upper()
|
||||
cmake_value = "ON" if getattr(self, field_name) is True else "OFF"
|
||||
cmake_args.append(f"-D{cmake_option}={cmake_value}")
|
||||
return cmake_args
|
||||
157
python-package/packager/nativelib.py
Normal file
157
python-package/packager/nativelib.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Functions for building libxgboost
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from platform import system
|
||||
from typing import Optional
|
||||
|
||||
from .build_config import BuildConfiguration
|
||||
|
||||
|
||||
def _lib_name() -> str:
|
||||
"""Return platform dependent shared object name."""
|
||||
if system() in ["Linux", "OS400"] or system().upper().endswith("BSD"):
|
||||
name = "libxgboost.so"
|
||||
elif system() == "Darwin":
|
||||
name = "libxgboost.dylib"
|
||||
elif system() == "Windows":
|
||||
name = "xgboost.dll"
|
||||
else:
|
||||
raise NotImplementedError(f"System {system()} not supported")
|
||||
return name
|
||||
|
||||
|
||||
def build_libxgboost(
|
||||
cpp_src_dir: pathlib.Path,
|
||||
build_dir: pathlib.Path,
|
||||
build_config: BuildConfiguration,
|
||||
) -> pathlib.Path:
|
||||
"""Build libxgboost in a temporary directory and obtain the path to built libxgboost"""
|
||||
logger = logging.getLogger("xgboost.packager.build_libxgboost")
|
||||
|
||||
if not cpp_src_dir.is_dir():
|
||||
raise RuntimeError(f"Expected {cpp_src_dir} to be a directory")
|
||||
logger.info(
|
||||
"Building %s from the C++ source files in %s...", _lib_name(), str(cpp_src_dir)
|
||||
)
|
||||
|
||||
def _build(*, generator: str) -> None:
|
||||
cmake_cmd = [
|
||||
"cmake",
|
||||
str(cpp_src_dir),
|
||||
generator,
|
||||
"-DKEEP_BUILD_ARTIFACTS_IN_BINARY_DIR=ON",
|
||||
]
|
||||
cmake_cmd.extend(build_config.get_cmake_args())
|
||||
|
||||
# Flag for cross-compiling for Apple Silicon
|
||||
# We use environment variable because it's the only way to pass down custom flags
|
||||
# through the cibuildwheel package, which calls `pip wheel` command.
|
||||
if "CIBW_TARGET_OSX_ARM64" in os.environ:
|
||||
cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64")
|
||||
|
||||
logger.info("CMake args: %s", str(cmake_cmd))
|
||||
subprocess.check_call(cmake_cmd, cwd=build_dir)
|
||||
|
||||
if system() == "Windows":
|
||||
subprocess.check_call(
|
||||
["cmake", "--build", ".", "--config", "Release"], cwd=build_dir
|
||||
)
|
||||
else:
|
||||
nproc = os.cpu_count()
|
||||
assert build_tool is not None
|
||||
subprocess.check_call([build_tool, f"-j{nproc}"], cwd=build_dir)
|
||||
|
||||
if system() == "Windows":
|
||||
supported_generators = (
|
||||
"-GVisual Studio 17 2022",
|
||||
"-GVisual Studio 16 2019",
|
||||
"-GVisual Studio 15 2017",
|
||||
"-GMinGW Makefiles",
|
||||
)
|
||||
for generator in supported_generators:
|
||||
try:
|
||||
_build(generator=generator)
|
||||
logger.info(
|
||||
"Successfully built %s using generator %s", _lib_name(), generator
|
||||
)
|
||||
break
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.info(
|
||||
"Tried building with generator %s but failed with exception %s",
|
||||
generator,
|
||||
str(e),
|
||||
)
|
||||
# Empty build directory
|
||||
shutil.rmtree(build_dir)
|
||||
build_dir.mkdir()
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"None of the supported generators produced a successful build!"
|
||||
f"Supported generators: {supported_generators}"
|
||||
)
|
||||
else:
|
||||
build_tool = "ninja" if shutil.which("ninja") else "make"
|
||||
generator = "-GNinja" if build_tool == "ninja" else "-GUnix Makefiles"
|
||||
try:
|
||||
_build(generator=generator)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.info("Failed to build with OpenMP. Exception: %s", str(e))
|
||||
build_config.use_openmp = False
|
||||
_build(generator=generator)
|
||||
|
||||
return build_dir / "lib" / _lib_name()
|
||||
|
||||
|
||||
def locate_local_libxgboost(
|
||||
toplevel_dir: pathlib.Path,
|
||||
logger: logging.Logger,
|
||||
) -> Optional[pathlib.Path]:
|
||||
"""
|
||||
Locate libxgboost from the local project directory's lib/ subdirectory.
|
||||
"""
|
||||
libxgboost = toplevel_dir.parent / "lib" / _lib_name()
|
||||
if libxgboost.exists():
|
||||
logger.info("Found %s at %s", libxgboost.name, str(libxgboost.parent))
|
||||
return libxgboost
|
||||
return None
|
||||
|
||||
|
||||
def locate_or_build_libxgboost(
|
||||
toplevel_dir: pathlib.Path,
|
||||
build_dir: pathlib.Path,
|
||||
build_config: BuildConfiguration,
|
||||
) -> pathlib.Path:
|
||||
"""Locate libxgboost; if not exist, build it"""
|
||||
logger = logging.getLogger("xgboost.packager.locate_or_build_libxgboost")
|
||||
|
||||
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
||||
if libxgboost is not None:
|
||||
return libxgboost
|
||||
if build_config.use_system_libxgboost:
|
||||
# Find libxgboost from system prefix
|
||||
sys_prefix = pathlib.Path(sys.prefix).absolute().resolve()
|
||||
libxgboost = sys_prefix / "lib" / _lib_name()
|
||||
if not libxgboost.exists():
|
||||
raise RuntimeError(
|
||||
f"use_system_libxgboost was specified but {_lib_name()} is "
|
||||
f"not found in {libxgboost.parent}"
|
||||
)
|
||||
|
||||
logger.info("Using system XGBoost: %s", str(libxgboost))
|
||||
return libxgboost
|
||||
|
||||
if toplevel_dir.joinpath("cpp_src").exists():
|
||||
# Source distribution; all C++ source files to be found in cpp_src/
|
||||
cpp_src_dir = toplevel_dir.joinpath("cpp_src")
|
||||
else:
|
||||
# Probably running "pip install ." from python-package/
|
||||
cpp_src_dir = toplevel_dir.parent
|
||||
if not cpp_src_dir.joinpath("CMakeLists.txt").exists():
|
||||
raise RuntimeError(f"Did not find CMakeLists.txt from {cpp_src_dir}")
|
||||
return build_libxgboost(cpp_src_dir, build_dir=build_dir, build_config=build_config)
|
||||
157
python-package/packager/pep517.py
Normal file
157
python-package/packager/pep517.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Custom build backend for XGBoost Python package.
|
||||
Builds source distribution and binary wheels, following PEP 517 / PEP 660.
|
||||
Reuses components of Hatchling (https://github.com/pypa/hatch/tree/master/backend) for the sake
|
||||
of brevity.
|
||||
"""
|
||||
import dataclasses
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Dict, Iterator, Optional, Union
|
||||
|
||||
import hatchling.build
|
||||
|
||||
from .build_config import BuildConfiguration
|
||||
from .nativelib import locate_local_libxgboost, locate_or_build_libxgboost
|
||||
from .sdist import copy_cpp_src_tree
|
||||
from .util import copy_with_logging, copytree_with_logging
|
||||
|
||||
|
||||
@contextmanager
|
||||
def cd(path: Union[str, pathlib.Path]) -> Iterator[str]: # pylint: disable=C0103
|
||||
"""
|
||||
Temporarily change working directory.
|
||||
TODO(hcho3): Remove this once we adopt Python 3.11, which implements contextlib.chdir.
|
||||
"""
|
||||
path = str(path)
|
||||
path = os.path.realpath(path)
|
||||
cwd = os.getcwd()
|
||||
os.chdir(path)
|
||||
try:
|
||||
yield path
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
|
||||
TOPLEVEL_DIR = pathlib.Path(__file__).parent.parent.absolute().resolve()
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
# Aliases
|
||||
get_requires_for_build_sdist = hatchling.build.get_requires_for_build_sdist
|
||||
get_requires_for_build_wheel = hatchling.build.get_requires_for_build_wheel
|
||||
get_requires_for_build_editable = hatchling.build.get_requires_for_build_editable
|
||||
|
||||
|
||||
def build_wheel(
|
||||
wheel_directory: str,
|
||||
config_settings: Optional[Dict[str, Any]] = None,
|
||||
metadata_directory: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build a wheel"""
|
||||
logger = logging.getLogger("xgboost.packager.build_wheel")
|
||||
|
||||
build_config = BuildConfiguration()
|
||||
build_config.update(config_settings)
|
||||
logger.info("Parsed build configuration: %s", dataclasses.asdict(build_config))
|
||||
|
||||
# Create tempdir with Python package + libxgboost
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
td_path = pathlib.Path(td)
|
||||
build_dir = td_path / "libbuild"
|
||||
build_dir.mkdir()
|
||||
|
||||
workspace = td_path / "whl_workspace"
|
||||
workspace.mkdir()
|
||||
logger.info("Copying project files to temporary directory %s", str(workspace))
|
||||
|
||||
copy_with_logging(TOPLEVEL_DIR / "pyproject.toml", workspace, logger=logger)
|
||||
copy_with_logging(TOPLEVEL_DIR / "hatch_build.py", workspace, logger=logger)
|
||||
copy_with_logging(TOPLEVEL_DIR / "README.rst", workspace, logger=logger)
|
||||
|
||||
pkg_path = workspace / "xgboost"
|
||||
copytree_with_logging(TOPLEVEL_DIR / "xgboost", pkg_path, logger=logger)
|
||||
lib_path = pkg_path / "lib"
|
||||
lib_path.mkdir()
|
||||
libxgboost = locate_or_build_libxgboost(
|
||||
TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config
|
||||
)
|
||||
copy_with_logging(libxgboost, lib_path, logger=logger)
|
||||
|
||||
with cd(workspace):
|
||||
wheel_name = hatchling.build.build_wheel(
|
||||
wheel_directory, config_settings, metadata_directory
|
||||
)
|
||||
return wheel_name
|
||||
|
||||
|
||||
def build_sdist(
|
||||
sdist_directory: str,
|
||||
config_settings: Optional[Dict[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Build a source distribution"""
|
||||
logger = logging.getLogger("xgboost.packager.build_sdist")
|
||||
|
||||
if config_settings:
|
||||
raise NotImplementedError(
|
||||
"XGBoost's custom build backend doesn't support config_settings option "
|
||||
f"when building sdist. {config_settings=}"
|
||||
)
|
||||
|
||||
cpp_src_dir = TOPLEVEL_DIR.parent
|
||||
if not cpp_src_dir.joinpath("CMakeLists.txt").exists():
|
||||
raise RuntimeError(f"Did not find CMakeLists.txt from {cpp_src_dir}")
|
||||
|
||||
# Create tempdir with Python package + C++ sources
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
td_path = pathlib.Path(td)
|
||||
|
||||
workspace = td_path / "sdist_workspace"
|
||||
workspace.mkdir()
|
||||
logger.info("Copying project files to temporary directory %s", str(workspace))
|
||||
|
||||
copy_with_logging(TOPLEVEL_DIR / "pyproject.toml", workspace, logger=logger)
|
||||
copy_with_logging(TOPLEVEL_DIR / "hatch_build.py", workspace, logger=logger)
|
||||
copy_with_logging(TOPLEVEL_DIR / "README.rst", workspace, logger=logger)
|
||||
|
||||
copytree_with_logging(
|
||||
TOPLEVEL_DIR / "xgboost", workspace / "xgboost", logger=logger
|
||||
)
|
||||
copytree_with_logging(
|
||||
TOPLEVEL_DIR / "packager", workspace / "packager", logger=logger
|
||||
)
|
||||
|
||||
temp_cpp_src_dir = workspace / "cpp_src"
|
||||
copy_cpp_src_tree(cpp_src_dir, target_dir=temp_cpp_src_dir, logger=logger)
|
||||
|
||||
with cd(workspace):
|
||||
sdist_name = hatchling.build.build_sdist(sdist_directory, config_settings)
|
||||
return sdist_name
|
||||
|
||||
|
||||
def build_editable(
|
||||
wheel_directory: str,
|
||||
config_settings: Optional[Dict[str, Any]] = None,
|
||||
metadata_directory: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an editable installation. We mostly delegate to Hatchling."""
|
||||
logger = logging.getLogger("xgboost.packager.build_editable")
|
||||
|
||||
if config_settings:
|
||||
raise NotImplementedError(
|
||||
"XGBoost's custom build backend doesn't support config_settings option "
|
||||
f"when building editable installation. {config_settings=}"
|
||||
)
|
||||
|
||||
if locate_local_libxgboost(TOPLEVEL_DIR, logger=logger) is None:
|
||||
raise RuntimeError(
|
||||
"To use the editable installation, first build libxgboost with CMake. "
|
||||
"See https://xgboost.readthedocs.io/en/latest/build.html for detailed instructions."
|
||||
)
|
||||
|
||||
return hatchling.build.build_editable(
|
||||
wheel_directory, config_settings, metadata_directory
|
||||
)
|
||||
27
python-package/packager/sdist.py
Normal file
27
python-package/packager/sdist.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""
|
||||
Functions for building sdist
|
||||
"""
|
||||
import logging
|
||||
import pathlib
|
||||
|
||||
from .util import copy_with_logging, copytree_with_logging
|
||||
|
||||
|
||||
def copy_cpp_src_tree(
|
||||
cpp_src_dir: pathlib.Path, target_dir: pathlib.Path, logger: logging.Logger
|
||||
) -> None:
|
||||
"""Copy C++ source tree into build directory"""
|
||||
|
||||
for subdir in [
|
||||
"src",
|
||||
"include",
|
||||
"dmlc-core",
|
||||
"gputreeshap",
|
||||
"rabit",
|
||||
"cmake",
|
||||
"plugin",
|
||||
]:
|
||||
copytree_with_logging(cpp_src_dir / subdir, target_dir / subdir, logger=logger)
|
||||
|
||||
for filename in ["CMakeLists.txt", "LICENSE"]:
|
||||
copy_with_logging(cpp_src_dir.joinpath(filename), target_dir, logger=logger)
|
||||
25
python-package/packager/util.py
Normal file
25
python-package/packager/util.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Utility functions for implementing PEP 517 backend
|
||||
"""
|
||||
import logging
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
|
||||
def copytree_with_logging(
|
||||
src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger
|
||||
) -> None:
|
||||
"""Call shutil.copytree() with logging"""
|
||||
logger.info("Copying %s -> %s", str(src), str(dest))
|
||||
shutil.copytree(src, dest)
|
||||
|
||||
|
||||
def copy_with_logging(
|
||||
src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger
|
||||
) -> None:
|
||||
"""Call shutil.copy() with logging"""
|
||||
if dest.is_dir():
|
||||
logger.info("Copying %s -> %s", str(src), str(dest / src.name))
|
||||
else:
|
||||
logger.info("Copying %s -> %s", str(src), str(dest))
|
||||
shutil.copy(src, dest)
|
||||
42
python-package/pyproject.toml
Normal file
42
python-package/pyproject.toml
Normal file
@@ -0,0 +1,42 @@
|
||||
[build-system]
|
||||
requires = [
|
||||
"hatchling>=1.12.1"
|
||||
]
|
||||
backend-path = ["."]
|
||||
build-backend = "packager.pep517"
|
||||
|
||||
[project]
|
||||
name = "xgboost"
|
||||
version = "2.0.0-dev"
|
||||
authors = [
|
||||
{name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"},
|
||||
{name = "Jiaming Yuan", email = "jm.yuan@outlook.com"}
|
||||
]
|
||||
description = "XGBoost Python Package"
|
||||
readme = {file = "README.rst", content-type = "text/x-rst"}
|
||||
requires-python = ">=3.8"
|
||||
license = {text = "Apache-2.0"}
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10"
|
||||
]
|
||||
dependencies = [
|
||||
"numpy",
|
||||
"scipy"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
pandas = ["pandas"]
|
||||
scikit-learn = ["scikit-learn"]
|
||||
dask = ["dask", "pandas", "distributed"]
|
||||
datatable = ["datatable"]
|
||||
plotting = ["graphviz", "matplotlib"]
|
||||
pyspark = ["pyspark", "scikit-learn", "cloudpickle"]
|
||||
|
||||
[tool.hatch.build.targets.wheel.hooks.custom]
|
||||
@@ -16,7 +16,7 @@ def config_doc(
|
||||
extra_note: Optional[str] = None,
|
||||
parameters: Optional[str] = None,
|
||||
returns: Optional[str] = None,
|
||||
see_also: Optional[str] = None
|
||||
see_also: Optional[str] = None,
|
||||
) -> Callable[[_F], _F]:
|
||||
"""Decorator to format docstring for config functions.
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ from .core import (
|
||||
_deprecate_positional_args,
|
||||
_expect,
|
||||
)
|
||||
from .data import _is_cudf_ser, _is_cupy_array
|
||||
from .sklearn import (
|
||||
XGBClassifier,
|
||||
XGBClassifierBase,
|
||||
@@ -1894,10 +1895,15 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa
|
||||
)
|
||||
|
||||
# pylint: disable=attribute-defined-outside-init
|
||||
if isinstance(y, (da.Array)):
|
||||
if isinstance(y, da.Array):
|
||||
self.classes_ = await self.client.compute(da.unique(y))
|
||||
else:
|
||||
self.classes_ = await self.client.compute(y.drop_duplicates())
|
||||
if _is_cudf_ser(self.classes_):
|
||||
self.classes_ = self.classes_.to_cupy()
|
||||
if _is_cupy_array(self.classes_):
|
||||
self.classes_ = self.classes_.get()
|
||||
self.classes_ = numpy.array(self.classes_)
|
||||
self.n_classes_ = len(self.classes_)
|
||||
|
||||
if self.n_classes_ > 2:
|
||||
|
||||
@@ -30,7 +30,7 @@ def plot_importance(
|
||||
grid: bool = True,
|
||||
show_values: bool = True,
|
||||
values_format: str = "{v}",
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Axes:
|
||||
"""Plot importance based on fitted trees.
|
||||
|
||||
@@ -155,7 +155,7 @@ def to_graphviz(
|
||||
no_color: Optional[str] = None,
|
||||
condition_node_params: Optional[dict] = None,
|
||||
leaf_node_params: Optional[dict] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> GraphvizSource:
|
||||
"""Convert specified tree to graphviz instance. IPython can automatically plot
|
||||
the returned graphviz instance. Otherwise, you should call .render() method
|
||||
@@ -250,7 +250,7 @@ def plot_tree(
|
||||
num_trees: int = 0,
|
||||
rankdir: Optional[str] = None,
|
||||
ax: Optional[Axes] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> Axes:
|
||||
"""Plot specified tree.
|
||||
|
||||
|
||||
@@ -219,7 +219,9 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
||||
array: Optional[np.ndarray] = part[feature_cols]
|
||||
elif part[name].shape[0] > 0:
|
||||
array = part[name]
|
||||
array = stack_series(array)
|
||||
if name == alias.data:
|
||||
# For the array/vector typed case.
|
||||
array = stack_series(array)
|
||||
else:
|
||||
array = None
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
"""Xgboost pyspark integration submodule for params."""
|
||||
from typing import Dict
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
from pyspark.ml.param import TypeConverters
|
||||
from pyspark.ml.param.shared import Param, Params
|
||||
@@ -11,7 +13,7 @@ class HasArbitraryParamsDict(Params):
|
||||
input.
|
||||
"""
|
||||
|
||||
arbitrary_params_dict: Param[dict] = Param(
|
||||
arbitrary_params_dict: "Param[Dict]" = Param(
|
||||
Params._dummy(),
|
||||
"arbitrary_params_dict",
|
||||
"arbitrary_params_dict This parameter holds all of the additional parameters which are "
|
||||
|
||||
@@ -317,13 +317,15 @@ class TestDataset:
|
||||
enable_categorical=True,
|
||||
)
|
||||
|
||||
def get_device_dmat(self) -> xgb.QuantileDMatrix:
|
||||
def get_device_dmat(self, max_bin: Optional[int]) -> xgb.QuantileDMatrix:
|
||||
import cupy as cp
|
||||
|
||||
w = None if self.w is None else cp.array(self.w)
|
||||
X = cp.array(self.X, dtype=np.float32)
|
||||
y = cp.array(self.y, dtype=np.float32)
|
||||
return xgb.QuantileDMatrix(X, y, weight=w, base_margin=self.margin)
|
||||
return xgb.QuantileDMatrix(
|
||||
X, y, weight=w, base_margin=self.margin, max_bin=max_bin
|
||||
)
|
||||
|
||||
def get_external_dmat(self) -> xgb.DMatrix:
|
||||
n_samples = self.X.shape[0]
|
||||
@@ -431,8 +433,11 @@ def make_ltr(
|
||||
"""Make a dataset for testing LTR."""
|
||||
rng = np.random.default_rng(1994)
|
||||
X = rng.normal(0, 1.0, size=n_samples * n_features).reshape(n_samples, n_features)
|
||||
y = rng.integers(0, max_rel, size=n_samples)
|
||||
qid = rng.integers(0, n_query_groups, size=n_samples)
|
||||
y = np.sum(X, axis=1)
|
||||
y -= y.min()
|
||||
y = np.round(y / y.max() * max_rel).astype(np.int32)
|
||||
|
||||
qid = rng.integers(0, n_query_groups, size=n_samples, dtype=np.int32)
|
||||
w = rng.normal(0, 1.0, size=n_query_groups)
|
||||
w -= np.min(w)
|
||||
w /= np.max(w)
|
||||
@@ -879,5 +884,12 @@ def data_dir(path: str) -> str:
|
||||
return os.path.join(demo_dir(path), "data")
|
||||
|
||||
|
||||
def load_agaricus(path: str) -> Tuple[xgb.DMatrix, xgb.DMatrix]:
|
||||
dpath = data_dir(path)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train?format=libsvm"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test?format=libsvm"))
|
||||
return dtrain, dtest
|
||||
|
||||
|
||||
def project_root(path: str) -> str:
|
||||
return normpath(os.path.join(demo_dir(path), os.path.pardir))
|
||||
|
||||
Reference in New Issue
Block a user