[backport] Fix arrow compatibility, hypothesis tests. (#7979)
This commit is contained in:
parent
eefa1ddd8a
commit
645855e8b1
@ -506,7 +506,6 @@ def record_batch_data_iter(data_iter: Iterator) -> Callable:
|
|||||||
"""Data iterator used to ingest Arrow columnar record batches. We are not using
|
"""Data iterator used to ingest Arrow columnar record batches. We are not using
|
||||||
class DataIter because it is only intended for building Device DMatrix and external
|
class DataIter because it is only intended for building Device DMatrix and external
|
||||||
memory DMatrix.
|
memory DMatrix.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from pyarrow.cffi import ffi
|
from pyarrow.cffi import ffi
|
||||||
|
|
||||||
@ -557,13 +556,7 @@ def _from_arrow(
|
|||||||
if enable_categorical:
|
if enable_categorical:
|
||||||
raise ValueError("categorical data in arrow is not supported yet.")
|
raise ValueError("categorical data in arrow is not supported yet.")
|
||||||
|
|
||||||
major, _, _ = version.StrictVersion(pa.__version__).version
|
|
||||||
if major == 4:
|
|
||||||
rb_iter = iter(data.to_batches())
|
rb_iter = iter(data.to_batches())
|
||||||
else:
|
|
||||||
# use_async=True to workaround pyarrow 6.0.1 hang,
|
|
||||||
# see Modin-3982 and ARROW-15362
|
|
||||||
rb_iter = iter(data.to_batches(use_async=True))
|
|
||||||
it = record_batch_data_iter(rb_iter)
|
it = record_batch_data_iter(rb_iter)
|
||||||
next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
|
next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
|
||||||
handle = ctypes.c_void_p()
|
handle = ctypes.c_void_p()
|
||||||
|
|||||||
@ -10,10 +10,10 @@ exact_parameter_strategy = strategies.fixed_dictionaries({
|
|||||||
'nthread': strategies.integers(1, 4),
|
'nthread': strategies.integers(1, 4),
|
||||||
'max_depth': strategies.integers(1, 11),
|
'max_depth': strategies.integers(1, 11),
|
||||||
'min_child_weight': strategies.floats(0.5, 2.0),
|
'min_child_weight': strategies.floats(0.5, 2.0),
|
||||||
'alpha': strategies.floats(0.0, 2.0),
|
'alpha': strategies.floats(1e-5, 2.0),
|
||||||
'lambda': strategies.floats(1e-5, 2.0),
|
'lambda': strategies.floats(1e-5, 2.0),
|
||||||
'eta': strategies.floats(0.01, 0.5),
|
'eta': strategies.floats(0.01, 0.5),
|
||||||
'gamma': strategies.floats(0.0, 2.0),
|
'gamma': strategies.floats(1e-5, 2.0),
|
||||||
'seed': strategies.integers(0, 10),
|
'seed': strategies.integers(0, 10),
|
||||||
# We cannot enable subsampling as the training loss can increase
|
# We cannot enable subsampling as the training loss can increase
|
||||||
# 'subsample': strategies.floats(0.5, 1.0),
|
# 'subsample': strategies.floats(0.5, 1.0),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user