[EM] Make page concatenation optional. (#10826)

This PR introduces a new parameter `extmem_concat_pages` to make the page concatenation optional for GPU hist. In addition, the document is updated for the new GPU-based external memory.
This commit is contained in:
Jiaming Yuan
2024-09-24 06:19:28 +08:00
committed by GitHub
parent 215da76263
commit e228c1a121
31 changed files with 690 additions and 388 deletions

View File

@@ -504,8 +504,8 @@ def _prediction_output(
class DataIter(ABC): # pylint: disable=too-many-instance-attributes
"""The interface for user defined data iterator. The iterator facilitates
distributed training, :py:class:`QuantileDMatrix`, and external memory support using
:py:class:`DMatrix`. Most of time, users don't need to interact with this class
directly.
:py:class:`DMatrix` or :py:class:`ExtMemQuantileDMatrix`. Most of time, users don't
need to interact with this class directly.
.. note::
@@ -525,15 +525,16 @@ class DataIter(ABC): # pylint: disable=too-many-instance-attributes
keep the cache.
on_host :
Whether the data should be cached on host memory instead of harddrive when using
GPU with external memory. If set to true, then the "external memory" would
simply be CPU (host) memory.
Whether the data should be cached on the host memory instead of the file system
when using GPU with external memory. When set to true (the default), the
"external memory" is the CPU (host) memory. See
:doc:`/tutorials/external_memory` for more info.
.. versionadded:: 3.0.0
.. warning::
This is still working in progress, not ready for test yet.
This is an experimental parameter.
"""
@@ -541,7 +542,7 @@ class DataIter(ABC): # pylint: disable=too-many-instance-attributes
self,
cache_prefix: Optional[str] = None,
release_data: bool = True,
on_host: bool = False,
on_host: bool = True,
) -> None:
self.cache_prefix = cache_prefix
self.on_host = on_host
@@ -1681,9 +1682,12 @@ class QuantileDMatrix(DMatrix):
class ExtMemQuantileDMatrix(DMatrix):
"""The external memory version of the :py:class:`QuantileDMatrix`.
See :doc:`/tutorials/external_memory` for explanation and usage examples, and
:py:class:`QuantileDMatrix` for parameter document.
.. warning::
This is still working in progress, not ready for test yet.
This is an experimental feature.
.. versionadded:: 3.0.0
@@ -1699,6 +1703,13 @@ class ExtMemQuantileDMatrix(DMatrix):
ref: Optional[DMatrix] = None,
enable_categorical: bool = False,
) -> None:
"""
Parameters
----------
data :
A user-defined :py:class:`DataIter` for loading data.
"""
self.max_bin = max_bin
self.missing = missing if missing is not None else np.nan
self.nthread = nthread if nthread is not None else -1