Use weakref instead of id for DataIter cache. (#9445)

- Fix case where Python reuses id from freed objects.
- Small optimization to column matrix with QDM by using `realloc` instead of copying data.
This commit is contained in:
Jiaming Yuan
2023-08-10 00:40:06 +08:00
committed by GitHub
parent d495a180d8
commit f05a23b41c
14 changed files with 193 additions and 63 deletions

View File

@@ -22,7 +22,10 @@ import xgboost
def make_batches(
n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str,
n_samples_per_batch: int,
n_features: int,
n_batches: int,
tmpdir: str,
) -> List[Tuple[str, str]]:
files: List[Tuple[str, str]] = []
rng = np.random.RandomState(1994)
@@ -38,6 +41,7 @@ def make_batches(
class Iterator(xgboost.DataIter):
"""A custom iterator for loading files in batches."""
def __init__(self, file_paths: List[Tuple[str, str]]):
self._file_paths = file_paths
self._it = 0