Fix categorical data with external memory. (#10433)

This commit is contained in:
Jiaming Yuan
2024-06-18 04:34:54 +08:00
committed by GitHub
parent a8ddbac163
commit b4cc350ec5
5 changed files with 31 additions and 7 deletions

View File

@@ -52,6 +52,21 @@ def test_single_batch(tree_method: str = "approx") -> None:
assert from_np.get_dump() == from_it.get_dump()
def test_with_cat_single() -> None:
X, y = tm.make_categorical(
n_samples=128, n_features=3, n_categories=6, onehot=False
)
Xy = xgb.DMatrix(SingleBatch(data=X, label=y), enable_categorical=True)
from_it = xgb.train({}, Xy, num_boost_round=3)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
from_Xy = xgb.train({}, Xy, num_boost_round=3)
jit = from_it.save_raw(raw_format="json")
jxy = from_Xy.save_raw(raw_format="json")
assert jit == jxy
def run_data_iterator(
n_samples_per_batch: int,
n_features: int,