Bump version

Define lazy isinstance for Python compat. (#5364 ) (#5369 )
* Avoid importing datatable. * Fix #5363.
2020-03-04 00:39:03 +00:00 · 2020-02-26 20:39:38 +08:00 · 2020-02-26 14:23:10 +08:00 · 2020-02-22 19:04:48 +08:00
9 changed files with 44 additions and 34 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.12)
-project(xgboost LANGUAGES CXX C VERSION 1.0.1)
+project(xgboost LANGUAGES CXX C VERSION 1.0.2)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.0.1
+1.0.2
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -79,6 +79,14 @@ else:
 # END NUMPY PATHLIB ATTRIBUTION
 ###############################################################################

+
+def lazy_isinstance(instance, module, name):
+    '''Use string representation to identify a type.'''
+    module = type(instance).__module__ == module
+    name = type(instance).__name__ == name
+    return module and name
+
+
 # pandas
 try:
    from pandas import DataFrame, Series
@@ -95,27 +103,6 @@ except ImportError:
    pandas_concat = None
    PANDAS_INSTALLED = False

-# dt
-try:
-    # Workaround for #4473, compatibility with dask
-    if sys.__stdin__ is not None and sys.__stdin__.closed:
-        sys.__stdin__ = None
-    import datatable
-
-    if hasattr(datatable, "Frame"):
-        DataTable = datatable.Frame
-    else:
-        DataTable = datatable.DataTable
-    DT_INSTALLED = True
-except ImportError:
-
-    # pylint: disable=too-few-public-methods
-    class DataTable(object):
-        """ dummy for datatable.DataTable """
-
-    DT_INSTALLED = False
-
-
 # cudf
 try:
    from cudf import DataFrame as CUDF_DataFrame
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -19,9 +19,9 @@ import scipy.sparse

 from .compat import (
    STRING_TYPES, DataFrame, MultiIndex, Int64Index, py_str,
-    PANDAS_INSTALLED, DataTable,
-    CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex,
-    os_fspath, os_PathLike)
+    PANDAS_INSTALLED, CUDF_INSTALLED,
+    CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex,
+    os_fspath, os_PathLike, lazy_isinstance)
 from .libpath import find_lib_path

 # c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h
@@ -319,7 +319,8 @@ DT_TYPE_MAPPER2 = {'bool': 'i', 'int': 'int', 'real': 'float'}
 def _maybe_dt_data(data, feature_names, feature_types,
                   meta=None, meta_type=None):
    """Validate feature names and types if data table"""
-    if not isinstance(data, DataTable):
+    if (not lazy_isinstance(data, 'datatable', 'Frame') and
+            not lazy_isinstance(data, 'datatable', 'DataTable')):
        return data, feature_names, feature_types

    if meta and data.shape[1] > 1:
@@ -470,7 +471,7 @@ class DMatrix(object):
            self._init_from_csc(data)
        elif isinstance(data, np.ndarray):
            self._init_from_npy2d(data, missing, nthread)
-        elif isinstance(data, DataTable):
+        elif lazy_isinstance(data, 'datatable', 'Frame'):
            self._init_from_dt(data, nthread)
        elif hasattr(data, "__cuda_array_interface__"):
            self._init_from_array_interface(data, missing, nthread)
@@ -1052,7 +1053,7 @@ class Booster(object):
            _check_call(
                _LIB.XGBoosterUnserializeFromBuffer(self.handle, ptr, length))
            self.__dict__.update(state)
-        elif isinstance(model_file, (STRING_TYPES, os_PathLike)):
+        elif isinstance(model_file, (STRING_TYPES, os_PathLike, bytearray)):
            self.load_model(model_file)
        elif model_file is None:
            pass
@@ -1512,7 +1513,8 @@ class Booster(object):
        return ctypes2buffer(cptr, length.value)

    def load_model(self, fname):
-        """Load the model from a file, local or as URI.
+        """Load the model from a file or bytearray. Path to file can be local
+        or as an URI.

        The model is loaded from an XGBoost format which is universal among the
        various XGBoost interfaces. Auxiliary attributes of the Python Booster
@@ -1530,6 +1532,12 @@ class Booster(object):
            # from URL.
            _check_call(_LIB.XGBoosterLoadModel(
                self.handle, c_str(os_fspath(fname))))
+        elif isinstance(fname, bytearray):
+            buf = fname
+            length = c_bst_ulong(len(buf))
+            ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
+            _check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr,
+                                                          length))
        else:
            raise TypeError('Unknown file type: ', fname)

--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -38,7 +38,7 @@ def _train_internal(params, dtrain,

    _params = dict(params) if isinstance(params, list) else params

-    if 'num_parallel_tree' in _params and params[
+    if 'num_parallel_tree' in _params and _params[
            'num_parallel_tree'] is not None:
        num_parallel_tree = _params['num_parallel_tree']
        nboost //= num_parallel_tree
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -35,6 +35,11 @@ def captured_output():


 class TestBasic(unittest.TestCase):
+    def test_compat(self):
+        from xgboost.compat import lazy_isinstance
+        a = np.array([1, 2, 3])
+        assert lazy_isinstance(a, 'numpy', 'ndarray')
+        assert not lazy_isinstance(a, 'numpy', 'dataframe')

    def test_basic(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -300,6 +300,13 @@ class TestModels(unittest.TestCase):
        assert float(config['learner']['objective'][
            'reg_loss_param']['scale_pos_weight']) == 0.5

+        buf = bst.save_raw()
+        from_raw = xgb.Booster()
+        from_raw.load_model(buf)
+
+        buf_from_raw = from_raw.save_raw()
+        assert buf == buf_from_raw
+
    def test_model_json_io(self):
        loc = locale.getpreferredencoding(False)
        model_path = 'test_model_json_io.json'
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -34,7 +34,8 @@ def test_binary_classification():
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
        for train_index, test_index in kf.split(X, y):
-            xgb_model = cls(random_state=42).fit(X[train_index], y[train_index])
+            clf = cls(random_state=42)
+            xgb_model = clf.fit(X[train_index], y[train_index], eval_metric=['auc', 'logloss'])
            preds = xgb_model.predict(X[test_index])
            labels = y[test_index]
            err = sum(1 for i in range(len(preds))
--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -1,5 +1,5 @@
 # coding: utf-8
-from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED, DT_INSTALLED
+from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
 from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED


@@ -19,7 +19,9 @@ def no_pandas():


 def no_dt():
-    return {'condition': not DT_INSTALLED,
+    import importlib.util
+    spec = importlib.util.find_spec('datatable')
+    return {'condition': spec is None,
            'reason': 'Datatable is not installed.'}
Author	SHA1	Message	Date
Hyunsu Cho	917b0a7b46	Bump version	2020-03-04 00:39:03 +00:00
Jiaming Yuan	58ebbab979	Define lazy isinstance for Python compat. (#5364 ) (#5369 ) * Avoid importing datatable. * Fix #5363.	2020-02-26 20:39:38 +08:00
Jiaming Yuan	2bc5d8d449	Restore loading model from buffer. (#5360 ) (#5366 )	2020-02-26 14:23:10 +08:00
Philip Hyunsu Cho	7d178cbd25	Fix a small typo in sklearn.py that broke multiple eval metrics (#5341 )	2020-02-22 19:04:48 +08:00
@@ -1 +1 @@
 .0.1
 .0.2