From 645855e8b1c932b54f33dfea342177e016bee335 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 7 Jun 2022 01:47:45 +0800 Subject: [PATCH] [backport] Fix arrow compatibility, hypothesis tests. (#7979) --- python-package/xgboost/data.py | 9 +-------- tests/python/test_updaters.py | 4 ++-- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 119b354fc..0fe56b663 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -506,7 +506,6 @@ def record_batch_data_iter(data_iter: Iterator) -> Callable: """Data iterator used to ingest Arrow columnar record batches. We are not using class DataIter because it is only intended for building Device DMatrix and external memory DMatrix. - """ from pyarrow.cffi import ffi @@ -557,13 +556,7 @@ def _from_arrow( if enable_categorical: raise ValueError("categorical data in arrow is not supported yet.") - major, _, _ = version.StrictVersion(pa.__version__).version - if major == 4: - rb_iter = iter(data.to_batches()) - else: - # use_async=True to workaround pyarrow 6.0.1 hang, - # see Modin-3982 and ARROW-15362 - rb_iter = iter(data.to_batches(use_async=True)) + rb_iter = iter(data.to_batches()) it = record_batch_data_iter(rb_iter) next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it) handle = ctypes.c_void_p() diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index ff40025af..f4d75b3fb 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -10,10 +10,10 @@ exact_parameter_strategy = strategies.fixed_dictionaries({ 'nthread': strategies.integers(1, 4), 'max_depth': strategies.integers(1, 11), 'min_child_weight': strategies.floats(0.5, 2.0), - 'alpha': strategies.floats(0.0, 2.0), + 'alpha': strategies.floats(1e-5, 2.0), 'lambda': strategies.floats(1e-5, 2.0), 'eta': strategies.floats(0.01, 0.5), - 'gamma': strategies.floats(0.0, 2.0), + 'gamma': strategies.floats(1e-5, 2.0), 'seed': strategies.integers(0, 10), # We cannot enable subsampling as the training loss can increase # 'subsample': strategies.floats(0.5, 1.0),