[backport] Fix arrow compatibility, hypothesis tests. (#7979)

2022-06-07 01:47:45 +08:00 · 2022-06-07 01:47:45 +08:00 · 645855e8b1
commit 645855e8b1
parent eefa1ddd8a
2 changed files with 3 additions and 10 deletions
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@ -506,7 +506,6 @@ def record_batch_data_iter(data_iter: Iterator) -> Callable:
    """Data iterator used to ingest Arrow columnar record batches. We are not using
    class DataIter because it is only intended for building Device DMatrix and external
    memory DMatrix.
    """
    from pyarrow.cffi import ffi
@ -557,13 +556,7 @@ def _from_arrow(
    if enable_categorical:
        raise ValueError("categorical data in arrow is not supported yet.")
    major, _, _ = version.StrictVersion(pa.__version__).version
    if major == 4:
    rb_iter = iter(data.to_batches())
    else:
        # use_async=True to workaround pyarrow 6.0.1 hang,
        # see Modin-3982 and ARROW-15362
        rb_iter = iter(data.to_batches(use_async=True))
    it = record_batch_data_iter(rb_iter)
    next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
    handle = ctypes.c_void_p()
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@ -10,10 +10,10 @@ exact_parameter_strategy = strategies.fixed_dictionaries({
    'nthread': strategies.integers(1, 4),
    'max_depth': strategies.integers(1, 11),
    'min_child_weight': strategies.floats(0.5, 2.0),
-    'alpha': strategies.floats(0.0, 2.0),
+    'alpha': strategies.floats(1e-5, 2.0),
    'lambda': strategies.floats(1e-5, 2.0),
    'eta': strategies.floats(0.01, 0.5),
-    'gamma': strategies.floats(0.0, 2.0),
+    'gamma': strategies.floats(1e-5, 2.0),
    'seed': strategies.integers(0, 10),
    # We cannot enable subsampling as the training loss can increase
    # 'subsample': strategies.floats(0.5, 1.0),