[backport] Fix arrow compatibility, hypothesis tests. (#7979)

2022-06-07 01:47:45 +08:00 · 2022-06-07 01:47:45 +08:00 · 645855e8b1
commit 645855e8b1
parent eefa1ddd8a
2 changed files with 3 additions and 10 deletions
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@ -506,7 +506,6 @@ def record_batch_data_iter(data_iter: Iterator) -> Callable:
    """Data iterator used to ingest Arrow columnar record batches. We are not using
    class DataIter because it is only intended for building Device DMatrix and external
    memory DMatrix.
-
    """
    from pyarrow.cffi import ffi

@ -557,13 +556,7 @@ def _from_arrow(
    if enable_categorical:
        raise ValueError("categorical data in arrow is not supported yet.")

-    major, _, _ = version.StrictVersion(pa.__version__).version
-    if major == 4:
    rb_iter = iter(data.to_batches())
-    else:
-        # use_async=True to workaround pyarrow 6.0.1 hang,
-        # see Modin-3982 and ARROW-15362
-        rb_iter = iter(data.to_batches(use_async=True))
    it = record_batch_data_iter(rb_iter)
    next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
    handle = ctypes.c_void_p()
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@ -10,10 +10,10 @@ exact_parameter_strategy = strategies.fixed_dictionaries({
    'nthread': strategies.integers(1, 4),
    'max_depth': strategies.integers(1, 11),
    'min_child_weight': strategies.floats(0.5, 2.0),
-    'alpha': strategies.floats(0.0, 2.0),
+    'alpha': strategies.floats(1e-5, 2.0),
    'lambda': strategies.floats(1e-5, 2.0),
    'eta': strategies.floats(0.01, 0.5),
-    'gamma': strategies.floats(0.0, 2.0),
+    'gamma': strategies.floats(1e-5, 2.0),
    'seed': strategies.integers(0, 10),
    # We cannot enable subsampling as the training loss can increase
    # 'subsample': strategies.floats(0.5, 1.0),