[EM] Allow staging ellpack on host for GPU external memory. (#10488)

- New parameter `on_host`.
- Abstract format creation and stream creation into policy classes.
This commit is contained in:
Jiaming Yuan
2024-06-28 04:42:18 +08:00
committed by GitHub
parent 824fba783e
commit e8a962575a
36 changed files with 842 additions and 317 deletions

View File

@@ -503,18 +503,29 @@ class DataIter(ABC): # pylint: disable=too-many-instance-attributes
----------
cache_prefix :
Prefix to the cache files, only used in external memory.
release_data :
Whether the iterator should release the data during iteration. Set it to True if
the data transformation (converting data to np.float32 type) is memory
intensive. Otherwise, if the transformation is computation intensive then we can
keep the cache.
on_host :
Whether the data should be cached on host memory instead of harddrive when using
GPU with external memory. If set to true, then the "external memory" would
simply be CPU (host) memory. This is still working in progress, not ready for
test yet.
"""
def __init__(
self, cache_prefix: Optional[str] = None, release_data: bool = True
self,
cache_prefix: Optional[str] = None,
release_data: bool = True,
on_host: bool = False,
) -> None:
self.cache_prefix = cache_prefix
self.on_host = on_host
self._handle = _ProxyDMatrix()
self._exception: Optional[Exception] = None
@@ -905,12 +916,12 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
def _init_from_iter(self, iterator: DataIter, enable_categorical: bool) -> None:
it = iterator
args = {
"missing": self.missing,
"nthread": self.nthread,
"cache_prefix": it.cache_prefix if it.cache_prefix else "",
}
args_cstr = from_pystr_to_cstr(json.dumps(args))
args = make_jcargs(
missing=self.missing,
nthread=self.nthread,
cache_prefix=it.cache_prefix if it.cache_prefix else "",
on_host=it.on_host,
)
handle = ctypes.c_void_p()
reset_callback, next_callback = it.get_callbacks(enable_categorical)
ret = _LIB.XGDMatrixCreateFromCallback(
@@ -918,7 +929,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
it.proxy.handle,
reset_callback,
next_callback,
args_cstr,
args,
ctypes.byref(handle),
)
it.reraise()

View File

@@ -198,19 +198,20 @@ def skip_win() -> PytestSkip:
class IteratorForTest(xgb.core.DataIter):
"""Iterator for testing streaming DMatrix. (external memory, quantile)"""
def __init__(
def __init__( # pylint: disable=too-many-arguments
self,
X: Sequence,
y: Sequence,
w: Optional[Sequence],
cache: Optional[str],
on_host: bool = False,
) -> None:
assert len(X) == len(y)
self.X = X
self.y = y
self.w = w
self.it = 0
super().__init__(cache_prefix=cache)
super().__init__(cache_prefix=cache, on_host=on_host)
def next(self, input_data: Callable) -> int:
if self.it == len(self.X):
@@ -367,7 +368,11 @@ class TestDataset:
weight.append(w)
it = IteratorForTest(
predictor, response, weight if weight else None, cache="cache"
predictor,
response,
weight if weight else None,
cache="cache",
on_host=False,
)
return xgb.DMatrix(it)

View File

@@ -22,7 +22,7 @@ def run_mixed_sparsity(device: str) -> None:
X = [cp.array(batch) for batch in X]
it = tm.IteratorForTest(X, y, None, None)
it = tm.IteratorForTest(X, y, None, None, on_host=False)
Xy_0 = xgboost.QuantileDMatrix(it)
X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)

View File

@@ -207,6 +207,7 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
it = tm.IteratorForTest(
*tm.make_batches(n_samples_per_batch, n_features, n_batches, use_cupy),
cache="cache",
on_host=False,
)
Xy: xgb.DMatrix = xgb.DMatrix(it)
xgb.train({"tree_method": tree_method, "max_bin": max_bin}, Xyw)