diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index f2eadfc99..30a51ec68 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -19,67 +19,6 @@ def py_str(x): return x.decode('utf-8') -############################################################################### -# START NUMPY PATHLIB ATTRIBUTION -############################################################################### -# os.PathLike compatibility used in Numpy: -# https://github.com/numpy/numpy/tree/v1.17.0 -# Attribution: -# https://github.com/numpy/numpy/blob/v1.17.0/numpy/compat/py3k.py#L188-L247 -# Backport os.fs_path, os.PathLike, and PurePath.__fspath__ -if sys.version_info[:2] >= (3, 6): - os_fspath = os.fspath - os_PathLike = os.PathLike -else: - def _PurePath__fspath__(self): - return str(self) - - class os_PathLike(abc.ABC): - """Abstract base class for implementing the file system path protocol.""" - - @abc.abstractmethod - def __fspath__(self): - """Return the file system path representation of the object.""" - raise NotImplementedError - - @classmethod - def __subclasshook__(cls, subclass): - if issubclass(subclass, PurePath): - return True - return hasattr(subclass, '__fspath__') - - def os_fspath(path): - """Return the path representation of a path-like object. - If str or bytes is passed in, it is returned unchanged. Otherwise the - os.PathLike interface is used to get the path representation. If the - path representation is not str or bytes, TypeError is raised. If the - provided path is not str, bytes, or os.PathLike, TypeError is raised. - """ - if isinstance(path, (str, bytes)): - return path - - # Work from the object's type to match method resolution of other magic - # methods. - path_type = type(path) - try: - path_repr = path_type.__fspath__(path) - except AttributeError as e: - if hasattr(path_type, '__fspath__'): - raise - if issubclass(path_type, PurePath): - return _PurePath__fspath__(path) - raise TypeError("expected str, bytes or os.PathLike object, " - "not " + path_type.__name__) from e - if isinstance(path_repr, (str, bytes)): - return path_repr - raise TypeError("expected {}.__fspath__() to return str or bytes, " - "not {}".format(path_type.__name__, - type(path_repr).__name__)) -############################################################################### -# END NUMPY PATHLIB ATTRIBUTION -############################################################################### - - def lazy_isinstance(instance, module, name): '''Use string representation to identify a type.''' module = type(instance).__module__ == module @@ -167,26 +106,9 @@ except ImportError: # dask try: import dask - from dask import delayed - from dask import dataframe as dd - from dask import array as da - from dask.distributed import Client, get_client - from dask.distributed import comm as distributed_comm - from dask.distributed import wait as distributed_wait - from distributed import get_worker as distributed_get_worker - DASK_INSTALLED = True except ImportError: - dd = None - da = None - Client = None - delayed = None - get_client = None - distributed_comm = None - distributed_wait = None - distributed_get_worker = None dask = None - DASK_INSTALLED = False diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 87d5c3aec..ceba043f2 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -16,10 +16,8 @@ import warnings import numpy as np import scipy.sparse -from .compat import ( - STRING_TYPES, DataFrame, py_str, - PANDAS_INSTALLED, - os_fspath, os_PathLike, lazy_isinstance) +from .compat import (STRING_TYPES, DataFrame, py_str, PANDAS_INSTALLED, + lazy_isinstance) from .libpath import find_lib_path # c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h @@ -590,7 +588,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes If set, the output is suppressed. """ _check_call(_LIB.XGDMatrixSaveBinary(self.handle, - c_str(os_fspath(fname)), + c_str(os.fspath(fname)), ctypes.c_int(silent))) def set_label(self, label): @@ -982,7 +980,7 @@ class Booster(object): _check_call( _LIB.XGBoosterUnserializeFromBuffer(self.handle, ptr, length)) self.__dict__.update(state) - elif isinstance(model_file, (STRING_TYPES, os_PathLike, bytearray)): + elif isinstance(model_file, (STRING_TYPES, os.PathLike, bytearray)): self.load_model(model_file) elif model_file is None: pass @@ -1582,11 +1580,11 @@ class Booster(object): Output file name """ - if isinstance(fname, (STRING_TYPES, os_PathLike)): # assume file name + if isinstance(fname, (STRING_TYPES, os.PathLike)): # assume file name _check_call(_LIB.XGBoosterSaveModel( - self.handle, c_str(os_fspath(fname)))) + self.handle, c_str(os.fspath(fname)))) else: - raise TypeError("fname must be a string or os_PathLike") + raise TypeError("fname must be a string or os PathLike") def save_raw(self): """Save the model to a in memory buffer representation instead of file. @@ -1620,11 +1618,11 @@ class Booster(object): Input file name or memory buffer(see also save_raw) """ - if isinstance(fname, (STRING_TYPES, os_PathLike)): + if isinstance(fname, (STRING_TYPES, os.PathLike)): # assume file name, cannot use os.path.exist to check, file can be # from URL. _check_call(_LIB.XGBoosterLoadModel( - self.handle, c_str(os_fspath(fname)))) + self.handle, c_str(os.fspath(fname)))) elif isinstance(fname, bytearray): buf = fname length = c_bst_ulong(len(buf)) @@ -1650,8 +1648,8 @@ class Booster(object): dump_format : string, optional Format of model dump file. Can be 'text' or 'json'. """ - if isinstance(fout, (STRING_TYPES, os_PathLike)): - fout = open(os_fspath(fout), 'w') + if isinstance(fout, (STRING_TYPES, os.PathLike)): + fout = open(os.fspath(fout), 'w') need_close = True else: need_close = False @@ -1685,7 +1683,7 @@ class Booster(object): Format of model dump. Can be 'text', 'json' or 'dot'. """ - fmap = os_fspath(fmap) + fmap = os.fspath(fmap) length = c_bst_ulong() sarr = ctypes.POINTER(ctypes.c_char_p)() if self.feature_names is not None and fmap == '': @@ -1765,7 +1763,7 @@ class Booster(object): importance_type: str, default 'weight' One of the importance types defined above. """ - fmap = os_fspath(fmap) + fmap = os.fspath(fmap) if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: raise ValueError('Feature importance is not defined for Booster type {}' .format(self.booster)) @@ -1858,7 +1856,7 @@ class Booster(object): The name of feature map file. """ # pylint: disable=too-many-locals - fmap = os_fspath(fmap) + fmap = os.fspath(fmap) if not PANDAS_INSTALLED: raise Exception(('pandas must be available to use this method.' 'Install pandas before calling again.')) diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 53172b19a..6286e2c21 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -24,8 +24,6 @@ import numpy from . import rabit from .compat import DASK_INSTALLED -from .compat import distributed_get_worker, distributed_wait, distributed_comm -from .compat import da, dd, delayed, get_client from .compat import sparse, scipy_sparse from .compat import PANDAS_INSTALLED, DataFrame, Series, pandas_concat from .compat import CUDF_concat @@ -38,9 +36,22 @@ from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase from .sklearn import xgboost_model_doc try: - from distributed import Client + from dask.distributed import Client, get_client + from dask.distributed import comm as distributed_comm + from dask.distributed import wait as distributed_wait + from dask.distributed import get_worker as distributed_get_worker + from dask import dataframe as dd + from dask import array as da + from dask import delayed except ImportError: Client = None + get_client = None + distributed_comm = None + distributed_wait = None + distributed_get_worker = None + dd = None + da = None + delayed = None # Current status is considered as initial support, many features are # not properly supported yet. @@ -83,6 +94,9 @@ def _assert_dask_support(): if not DASK_INSTALLED: raise ImportError( 'Dask needs to be installed in order to use this module') + if not distributed_wait: + raise ImportError( + 'distributed needs to be installed in order to use this module.') if platform.system() == 'Windows': msg = 'Windows is not officially supported for dask/xgboost,' msg += ' contribution are welcomed.' diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 8bcf0bb20..2705249b7 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -4,12 +4,13 @@ import ctypes import json import warnings +import os import numpy as np from .core import c_array, _LIB, _check_call, c_str from .core import DataIter, DeviceQuantileDMatrix, DMatrix -from .compat import lazy_isinstance, os_fspath, os_PathLike +from .compat import lazy_isinstance c_bst_ulong = ctypes.c_uint64 # pylint: disable=invalid-name @@ -478,13 +479,13 @@ def _from_dlpack(data, missing, nthread, feature_names, feature_types): def _is_uri(data): - return isinstance(data, (str, os_PathLike)) + return isinstance(data, (str, os.PathLike)) def _from_uri(data, missing, feature_names, feature_types): _warn_unused_missing(data, missing) handle = ctypes.c_void_p() - _check_call(_LIB.XGDMatrixCreateFromFile(c_str(os_fspath(data)), + _check_call(_LIB.XGDMatrixCreateFromFile(c_str(os.fspath(data)), ctypes.c_int(1), ctypes.byref(handle))) return handle, feature_names, feature_types diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index f003e99b6..1e3505788 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -248,7 +248,7 @@ class TestBasic(unittest.TestCase): class TestBasicPathLike(unittest.TestCase): - """Unit tests using the os_fspath and pathlib.Path for file interaction.""" + """Unit tests using pathlib.Path for file interaction.""" def test_DMatrix_init_from_path(self): """Initialization from the data path.""" @@ -312,24 +312,8 @@ class TestBasicPathLike(unittest.TestCase): # load again using load_model bst3 = xgb.Booster() bst3.load_model(save_path) - dump3= bst3.get_dump() + dump3 = bst3.get_dump() dump_assertions(dump3) # remove file Path.unlink(save_path) - - def test_os_fspath(self): - """Core properties of the os_fspath function.""" - # strings are returned unmodified - assert '' == xgb.compat.os_fspath('') - assert '/this/path' == xgb.compat.os_fspath('/this/path') - - # bytes are returned unmodified - assert b'/this/path' == xgb.compat.os_fspath(b'/this/path') - - # path objects are returned as string representation - path_test = Path('this') / 'path' - assert str(path_test) == xgb.compat.os_fspath(path_test) - - # invalid values raise Type error - self.assertRaises(TypeError, xgb.compat.os_fspath, 123)