Use weakref instead of id for DataIter cache. (#9445)
- Fix case where Python reuses id from freed objects. - Small optimization to column matrix with QDM by using `realloc` instead of copying data.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from typing import Callable, Dict, List
|
||||
import weakref
|
||||
from typing import Any, Callable, Dict, List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
@@ -179,5 +180,18 @@ def test_data_cache() -> None:
|
||||
data = make_batches(n_samples_per_batch, n_features, n_batches, False)
|
||||
batches = [v[0] for v in data]
|
||||
it = IterForCacheTest(*batches)
|
||||
transform = xgb.data._proxy_transform
|
||||
|
||||
called = 0
|
||||
|
||||
def mock(*args: Any, **kwargs: Any) -> Any:
|
||||
nonlocal called
|
||||
called += 1
|
||||
return transform(*args, **kwargs)
|
||||
|
||||
xgb.data._proxy_transform = mock
|
||||
xgb.QuantileDMatrix(it)
|
||||
assert it._input_id == id(batches[0])
|
||||
assert it._data_ref is weakref.ref(batches[0])
|
||||
assert called == 1
|
||||
|
||||
xgb.data._proxy_transform = transform
|
||||
|
||||
@@ -103,12 +103,29 @@ class TestQuantileDMatrix:
|
||||
*make_batches_sparse(
|
||||
n_samples_per_batch, n_features, n_batches, sparsity
|
||||
),
|
||||
None
|
||||
None,
|
||||
)
|
||||
Xy = xgb.QuantileDMatrix(it)
|
||||
assert Xy.num_row() == n_samples_per_batch * n_batches
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
def test_different_size(self) -> None:
|
||||
n_samples_per_batch = 317
|
||||
n_features = 8
|
||||
n_batches = 7
|
||||
|
||||
it = IteratorForTest(
|
||||
*make_batches(
|
||||
n_samples_per_batch, n_features, n_batches, False, vary_size=True
|
||||
),
|
||||
cache=None,
|
||||
)
|
||||
Xy = xgb.QuantileDMatrix(it)
|
||||
assert Xy.num_row() == 2429
|
||||
X, y, w = it.as_arrays()
|
||||
Xy1 = xgb.QuantileDMatrix(X, y, weight=w)
|
||||
assert predictor_equal(Xy, Xy1)
|
||||
|
||||
@pytest.mark.parametrize("sparsity", [0.0, 0.1, 0.5, 0.8, 0.9])
|
||||
def test_training(self, sparsity: float) -> None:
|
||||
n_samples_per_batch = 317
|
||||
@@ -123,7 +140,7 @@ class TestQuantileDMatrix:
|
||||
*make_batches_sparse(
|
||||
n_samples_per_batch, n_features, n_batches, sparsity
|
||||
),
|
||||
None
|
||||
None,
|
||||
)
|
||||
|
||||
parameters = {"tree_method": "hist", "max_bin": 256}
|
||||
|
||||
Reference in New Issue
Block a user