Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
917b0a7b46 | ||
|
|
58ebbab979 | ||
|
|
2bc5d8d449 | ||
|
|
7d178cbd25 |
@@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.12)
|
cmake_minimum_required(VERSION 3.12)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 1.0.1)
|
project(xgboost LANGUAGES CXX C VERSION 1.0.2)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.0.1
|
1.0.2
|
||||||
|
|||||||
@@ -79,6 +79,14 @@ else:
|
|||||||
# END NUMPY PATHLIB ATTRIBUTION
|
# END NUMPY PATHLIB ATTRIBUTION
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
def lazy_isinstance(instance, module, name):
|
||||||
|
'''Use string representation to identify a type.'''
|
||||||
|
module = type(instance).__module__ == module
|
||||||
|
name = type(instance).__name__ == name
|
||||||
|
return module and name
|
||||||
|
|
||||||
|
|
||||||
# pandas
|
# pandas
|
||||||
try:
|
try:
|
||||||
from pandas import DataFrame, Series
|
from pandas import DataFrame, Series
|
||||||
@@ -95,27 +103,6 @@ except ImportError:
|
|||||||
pandas_concat = None
|
pandas_concat = None
|
||||||
PANDAS_INSTALLED = False
|
PANDAS_INSTALLED = False
|
||||||
|
|
||||||
# dt
|
|
||||||
try:
|
|
||||||
# Workaround for #4473, compatibility with dask
|
|
||||||
if sys.__stdin__ is not None and sys.__stdin__.closed:
|
|
||||||
sys.__stdin__ = None
|
|
||||||
import datatable
|
|
||||||
|
|
||||||
if hasattr(datatable, "Frame"):
|
|
||||||
DataTable = datatable.Frame
|
|
||||||
else:
|
|
||||||
DataTable = datatable.DataTable
|
|
||||||
DT_INSTALLED = True
|
|
||||||
except ImportError:
|
|
||||||
|
|
||||||
# pylint: disable=too-few-public-methods
|
|
||||||
class DataTable(object):
|
|
||||||
""" dummy for datatable.DataTable """
|
|
||||||
|
|
||||||
DT_INSTALLED = False
|
|
||||||
|
|
||||||
|
|
||||||
# cudf
|
# cudf
|
||||||
try:
|
try:
|
||||||
from cudf import DataFrame as CUDF_DataFrame
|
from cudf import DataFrame as CUDF_DataFrame
|
||||||
|
|||||||
@@ -19,9 +19,9 @@ import scipy.sparse
|
|||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
STRING_TYPES, DataFrame, MultiIndex, Int64Index, py_str,
|
STRING_TYPES, DataFrame, MultiIndex, Int64Index, py_str,
|
||||||
PANDAS_INSTALLED, DataTable,
|
PANDAS_INSTALLED, CUDF_INSTALLED,
|
||||||
CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex,
|
CUDF_DataFrame, CUDF_Series, CUDF_MultiIndex,
|
||||||
os_fspath, os_PathLike)
|
os_fspath, os_PathLike, lazy_isinstance)
|
||||||
from .libpath import find_lib_path
|
from .libpath import find_lib_path
|
||||||
|
|
||||||
# c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h
|
# c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h
|
||||||
@@ -319,7 +319,8 @@ DT_TYPE_MAPPER2 = {'bool': 'i', 'int': 'int', 'real': 'float'}
|
|||||||
def _maybe_dt_data(data, feature_names, feature_types,
|
def _maybe_dt_data(data, feature_names, feature_types,
|
||||||
meta=None, meta_type=None):
|
meta=None, meta_type=None):
|
||||||
"""Validate feature names and types if data table"""
|
"""Validate feature names and types if data table"""
|
||||||
if not isinstance(data, DataTable):
|
if (not lazy_isinstance(data, 'datatable', 'Frame') and
|
||||||
|
not lazy_isinstance(data, 'datatable', 'DataTable')):
|
||||||
return data, feature_names, feature_types
|
return data, feature_names, feature_types
|
||||||
|
|
||||||
if meta and data.shape[1] > 1:
|
if meta and data.shape[1] > 1:
|
||||||
@@ -470,7 +471,7 @@ class DMatrix(object):
|
|||||||
self._init_from_csc(data)
|
self._init_from_csc(data)
|
||||||
elif isinstance(data, np.ndarray):
|
elif isinstance(data, np.ndarray):
|
||||||
self._init_from_npy2d(data, missing, nthread)
|
self._init_from_npy2d(data, missing, nthread)
|
||||||
elif isinstance(data, DataTable):
|
elif lazy_isinstance(data, 'datatable', 'Frame'):
|
||||||
self._init_from_dt(data, nthread)
|
self._init_from_dt(data, nthread)
|
||||||
elif hasattr(data, "__cuda_array_interface__"):
|
elif hasattr(data, "__cuda_array_interface__"):
|
||||||
self._init_from_array_interface(data, missing, nthread)
|
self._init_from_array_interface(data, missing, nthread)
|
||||||
@@ -1052,7 +1053,7 @@ class Booster(object):
|
|||||||
_check_call(
|
_check_call(
|
||||||
_LIB.XGBoosterUnserializeFromBuffer(self.handle, ptr, length))
|
_LIB.XGBoosterUnserializeFromBuffer(self.handle, ptr, length))
|
||||||
self.__dict__.update(state)
|
self.__dict__.update(state)
|
||||||
elif isinstance(model_file, (STRING_TYPES, os_PathLike)):
|
elif isinstance(model_file, (STRING_TYPES, os_PathLike, bytearray)):
|
||||||
self.load_model(model_file)
|
self.load_model(model_file)
|
||||||
elif model_file is None:
|
elif model_file is None:
|
||||||
pass
|
pass
|
||||||
@@ -1512,7 +1513,8 @@ class Booster(object):
|
|||||||
return ctypes2buffer(cptr, length.value)
|
return ctypes2buffer(cptr, length.value)
|
||||||
|
|
||||||
def load_model(self, fname):
|
def load_model(self, fname):
|
||||||
"""Load the model from a file, local or as URI.
|
"""Load the model from a file or bytearray. Path to file can be local
|
||||||
|
or as an URI.
|
||||||
|
|
||||||
The model is loaded from an XGBoost format which is universal among the
|
The model is loaded from an XGBoost format which is universal among the
|
||||||
various XGBoost interfaces. Auxiliary attributes of the Python Booster
|
various XGBoost interfaces. Auxiliary attributes of the Python Booster
|
||||||
@@ -1530,6 +1532,12 @@ class Booster(object):
|
|||||||
# from URL.
|
# from URL.
|
||||||
_check_call(_LIB.XGBoosterLoadModel(
|
_check_call(_LIB.XGBoosterLoadModel(
|
||||||
self.handle, c_str(os_fspath(fname))))
|
self.handle, c_str(os_fspath(fname))))
|
||||||
|
elif isinstance(fname, bytearray):
|
||||||
|
buf = fname
|
||||||
|
length = c_bst_ulong(len(buf))
|
||||||
|
ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
|
||||||
|
_check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr,
|
||||||
|
length))
|
||||||
else:
|
else:
|
||||||
raise TypeError('Unknown file type: ', fname)
|
raise TypeError('Unknown file type: ', fname)
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ def _train_internal(params, dtrain,
|
|||||||
|
|
||||||
_params = dict(params) if isinstance(params, list) else params
|
_params = dict(params) if isinstance(params, list) else params
|
||||||
|
|
||||||
if 'num_parallel_tree' in _params and params[
|
if 'num_parallel_tree' in _params and _params[
|
||||||
'num_parallel_tree'] is not None:
|
'num_parallel_tree'] is not None:
|
||||||
num_parallel_tree = _params['num_parallel_tree']
|
num_parallel_tree = _params['num_parallel_tree']
|
||||||
nboost //= num_parallel_tree
|
nboost //= num_parallel_tree
|
||||||
|
|||||||
@@ -35,6 +35,11 @@ def captured_output():
|
|||||||
|
|
||||||
|
|
||||||
class TestBasic(unittest.TestCase):
|
class TestBasic(unittest.TestCase):
|
||||||
|
def test_compat(self):
|
||||||
|
from xgboost.compat import lazy_isinstance
|
||||||
|
a = np.array([1, 2, 3])
|
||||||
|
assert lazy_isinstance(a, 'numpy', 'ndarray')
|
||||||
|
assert not lazy_isinstance(a, 'numpy', 'dataframe')
|
||||||
|
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
|
|||||||
@@ -300,6 +300,13 @@ class TestModels(unittest.TestCase):
|
|||||||
assert float(config['learner']['objective'][
|
assert float(config['learner']['objective'][
|
||||||
'reg_loss_param']['scale_pos_weight']) == 0.5
|
'reg_loss_param']['scale_pos_weight']) == 0.5
|
||||||
|
|
||||||
|
buf = bst.save_raw()
|
||||||
|
from_raw = xgb.Booster()
|
||||||
|
from_raw.load_model(buf)
|
||||||
|
|
||||||
|
buf_from_raw = from_raw.save_raw()
|
||||||
|
assert buf == buf_from_raw
|
||||||
|
|
||||||
def test_model_json_io(self):
|
def test_model_json_io(self):
|
||||||
loc = locale.getpreferredencoding(False)
|
loc = locale.getpreferredencoding(False)
|
||||||
model_path = 'test_model_json_io.json'
|
model_path = 'test_model_json_io.json'
|
||||||
|
|||||||
@@ -34,7 +34,8 @@ def test_binary_classification():
|
|||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
|
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
|
||||||
for train_index, test_index in kf.split(X, y):
|
for train_index, test_index in kf.split(X, y):
|
||||||
xgb_model = cls(random_state=42).fit(X[train_index], y[train_index])
|
clf = cls(random_state=42)
|
||||||
|
xgb_model = clf.fit(X[train_index], y[train_index], eval_metric=['auc', 'logloss'])
|
||||||
preds = xgb_model.predict(X[test_index])
|
preds = xgb_model.predict(X[test_index])
|
||||||
labels = y[test_index]
|
labels = y[test_index]
|
||||||
err = sum(1 for i in range(len(preds))
|
err = sum(1 for i in range(len(preds))
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED, DT_INSTALLED
|
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||||
from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED
|
from xgboost.compat import CUDF_INSTALLED, DASK_INSTALLED
|
||||||
|
|
||||||
|
|
||||||
@@ -19,7 +19,9 @@ def no_pandas():
|
|||||||
|
|
||||||
|
|
||||||
def no_dt():
|
def no_dt():
|
||||||
return {'condition': not DT_INSTALLED,
|
import importlib.util
|
||||||
|
spec = importlib.util.find_spec('datatable')
|
||||||
|
return {'condition': spec is None,
|
||||||
'reason': 'Datatable is not installed.'}
|
'reason': 'Datatable is not installed.'}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user