Use weakref instead of id for DataIter cache. (#9445)
- Fix case where Python reuses id from freed objects. - Small optimization to column matrix with QDM by using `realloc` instead of copying data.
This commit is contained in:
@@ -22,7 +22,10 @@ import xgboost
|
||||
|
||||
|
||||
def make_batches(
|
||||
n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str,
|
||||
n_samples_per_batch: int,
|
||||
n_features: int,
|
||||
n_batches: int,
|
||||
tmpdir: str,
|
||||
) -> List[Tuple[str, str]]:
|
||||
files: List[Tuple[str, str]] = []
|
||||
rng = np.random.RandomState(1994)
|
||||
@@ -38,6 +41,7 @@ def make_batches(
|
||||
|
||||
class Iterator(xgboost.DataIter):
|
||||
"""A custom iterator for loading files in batches."""
|
||||
|
||||
def __init__(self, file_paths: List[Tuple[str, str]]):
|
||||
self._file_paths = file_paths
|
||||
self._it = 0
|
||||
|
||||
Reference in New Issue
Block a user