Support more scipy types. (#9881)
This commit is contained in:
parent
cd473c9da3
commit
1aa8c8d9be
@ -57,12 +57,23 @@ def _check_data_shape(data: DataType) -> None:
|
||||
raise ValueError("Please reshape the input data into 2-dimensional matrix.")
|
||||
|
||||
|
||||
def _is_scipy_csr(data: DataType) -> bool:
|
||||
def is_scipy_csr(data: DataType) -> bool:
|
||||
"""Predicate for scipy CSR input."""
|
||||
is_array = False
|
||||
is_matrix = False
|
||||
try:
|
||||
import scipy.sparse
|
||||
from scipy.sparse import csr_array
|
||||
|
||||
is_array = isinstance(data, csr_array)
|
||||
except ImportError:
|
||||
return False
|
||||
return isinstance(data, scipy.sparse.csr_matrix)
|
||||
pass
|
||||
try:
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
is_matrix = isinstance(data, csr_matrix)
|
||||
except ImportError:
|
||||
pass
|
||||
return is_array or is_matrix
|
||||
|
||||
|
||||
def _array_interface_dict(data: np.ndarray) -> dict:
|
||||
@ -135,12 +146,23 @@ def _from_scipy_csr(
|
||||
return handle, feature_names, feature_types
|
||||
|
||||
|
||||
def _is_scipy_csc(data: DataType) -> bool:
|
||||
def is_scipy_csc(data: DataType) -> bool:
|
||||
"""Predicate for scipy CSC input."""
|
||||
is_array = False
|
||||
is_matrix = False
|
||||
try:
|
||||
import scipy.sparse
|
||||
from scipy.sparse import csc_array
|
||||
|
||||
is_array = isinstance(data, csc_array)
|
||||
except ImportError:
|
||||
return False
|
||||
return isinstance(data, scipy.sparse.csc_matrix)
|
||||
pass
|
||||
try:
|
||||
from scipy.sparse import csc_matrix
|
||||
|
||||
is_matrix = isinstance(data, csc_matrix)
|
||||
except ImportError:
|
||||
pass
|
||||
return is_array or is_matrix
|
||||
|
||||
|
||||
def _from_scipy_csc(
|
||||
@ -171,12 +193,23 @@ def _from_scipy_csc(
|
||||
return handle, feature_names, feature_types
|
||||
|
||||
|
||||
def _is_scipy_coo(data: DataType) -> bool:
|
||||
def is_scipy_coo(data: DataType) -> bool:
|
||||
"""Predicate for scipy COO input."""
|
||||
is_array = False
|
||||
is_matrix = False
|
||||
try:
|
||||
import scipy.sparse
|
||||
from scipy.sparse import coo_array
|
||||
|
||||
is_array = isinstance(data, coo_array)
|
||||
except ImportError:
|
||||
return False
|
||||
return isinstance(data, scipy.sparse.coo_matrix)
|
||||
pass
|
||||
try:
|
||||
from scipy.sparse import coo_matrix
|
||||
|
||||
is_matrix = isinstance(data, coo_matrix)
|
||||
except ImportError:
|
||||
pass
|
||||
return is_array or is_matrix
|
||||
|
||||
|
||||
def _is_np_array_like(data: DataType) -> bool:
|
||||
@ -1138,15 +1171,15 @@ def dispatch_data_backend(
|
||||
"""Dispatch data for DMatrix."""
|
||||
if not _is_cudf_ser(data) and not _is_pandas_series(data):
|
||||
_check_data_shape(data)
|
||||
if _is_scipy_csr(data):
|
||||
if is_scipy_csr(data):
|
||||
return _from_scipy_csr(
|
||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
||||
)
|
||||
if _is_scipy_csc(data):
|
||||
if is_scipy_csc(data):
|
||||
return _from_scipy_csc(
|
||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
||||
)
|
||||
if _is_scipy_coo(data):
|
||||
if is_scipy_coo(data):
|
||||
return _from_scipy_csr(
|
||||
data.tocsr(),
|
||||
missing,
|
||||
@ -1396,9 +1429,15 @@ def _proxy_transform(
|
||||
if _is_np_array_like(data):
|
||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_scipy_csr(data):
|
||||
if is_scipy_csr(data):
|
||||
data = transform_scipy_sparse(data, True)
|
||||
return data, None, feature_names, feature_types
|
||||
if is_scipy_csc(data):
|
||||
data = transform_scipy_sparse(data.tocsr(), True)
|
||||
return data, None, feature_names, feature_types
|
||||
if is_scipy_coo(data):
|
||||
data = transform_scipy_sparse(data.tocsr(), True)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_pandas_series(data):
|
||||
import pandas as pd
|
||||
|
||||
@ -1451,7 +1490,7 @@ def dispatch_proxy_set_data(
|
||||
_check_data_shape(data)
|
||||
proxy._set_data_from_array(data) # pylint: disable=W0212
|
||||
return
|
||||
if _is_scipy_csr(data):
|
||||
if is_scipy_csr(data):
|
||||
proxy._set_data_from_csr(data) # pylint: disable=W0212
|
||||
return
|
||||
raise err
|
||||
|
||||
@ -112,39 +112,6 @@ class TestDMatrix:
|
||||
with pytest.raises(ValueError):
|
||||
xgb.DMatrix(data)
|
||||
|
||||
def test_csr(self):
|
||||
indptr = np.array([0, 2, 3, 6])
|
||||
indices = np.array([0, 2, 2, 0, 1, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = scipy.sparse.csr_matrix((data, indices, indptr), shape=(3, 3))
|
||||
dtrain = xgb.DMatrix(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
def test_csc(self):
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = scipy.sparse.csc_matrix((data, (row, col)), shape=(3, 3))
|
||||
dtrain = xgb.DMatrix(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
indptr = np.array([0, 3, 5])
|
||||
data = np.array([0, 1, 2, 3, 4])
|
||||
row_idx = np.array([0, 1, 2, 0, 2])
|
||||
X = scipy.sparse.csc_matrix((data, row_idx, indptr), shape=(3, 2))
|
||||
assert tm.predictor_equal(xgb.DMatrix(X.tocsr()), xgb.DMatrix(X))
|
||||
|
||||
def test_coo(self):
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = scipy.sparse.coo_matrix((data, (row, col)), shape=(3, 3))
|
||||
dtrain = xgb.DMatrix(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
def test_np_view(self):
|
||||
# Sliced Float32 array
|
||||
y = np.array([12, 34, 56], np.float32)[::2]
|
||||
|
||||
87
tests/python/test_with_scipy.py
Normal file
87
tests/python/test_with_scipy.py
Normal file
@ -0,0 +1,87 @@
|
||||
import itertools
|
||||
import warnings
|
||||
from typing import Type
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("error")
|
||||
@pytest.mark.parametrize(
|
||||
"DMatrixT,CSR",
|
||||
[
|
||||
(m, n)
|
||||
for m, n in itertools.product(
|
||||
(xgb.DMatrix, xgb.QuantileDMatrix),
|
||||
(scipy.sparse.csr_matrix, scipy.sparse.csr_array),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_csr(DMatrixT: Type[xgb.DMatrix], CSR: Type) -> None:
|
||||
with warnings.catch_warnings():
|
||||
indptr = np.array([0, 2, 3, 6])
|
||||
indices = np.array([0, 2, 2, 0, 1, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = CSR((data, indices, indptr), shape=(3, 3))
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
assert dtrain.num_nonmissing() == data.size
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("error")
|
||||
@pytest.mark.parametrize(
|
||||
"DMatrixT,CSC",
|
||||
[
|
||||
(m, n)
|
||||
for m, n in itertools.product(
|
||||
(xgb.DMatrix, xgb.QuantileDMatrix),
|
||||
(scipy.sparse.csc_matrix, scipy.sparse.csc_array),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_csc(DMatrixT: Type[xgb.DMatrix], CSC: Type) -> None:
|
||||
with warnings.catch_warnings():
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = CSC((data, (row, col)), shape=(3, 3))
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
assert dtrain.num_nonmissing() == data.size
|
||||
|
||||
indptr = np.array([0, 3, 5])
|
||||
data = np.array([0, 1, 2, 3, 4])
|
||||
row_idx = np.array([0, 1, 2, 0, 2])
|
||||
X = CSC((data, row_idx, indptr), shape=(3, 2))
|
||||
assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("error")
|
||||
@pytest.mark.parametrize(
|
||||
"DMatrixT,COO",
|
||||
[
|
||||
(m, n)
|
||||
for m, n in itertools.product(
|
||||
(xgb.DMatrix, xgb.QuantileDMatrix),
|
||||
(scipy.sparse.coo_matrix, scipy.sparse.coo_array),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_coo(DMatrixT: Type[xgb.DMatrix], COO: Type) -> None:
|
||||
with warnings.catch_warnings():
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = COO((data, (row, col)), shape=(3, 3))
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
assert dtrain.num_nonmissing() == data.size
|
||||
|
||||
assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
|
||||
Loading…
x
Reference in New Issue
Block a user