Support more input types for categorical data. (#7220)

* Support more input types for categorical data.

* Shorten the type name from "categorical" to "c".
* Tests for np/cp array and scipy csr/csc/coo.
* Specify the type for feature info.
This commit is contained in:
Jiaming Yuan
2021-09-16 20:39:30 +08:00
committed by GitHub
parent 2942dc68e4
commit 0ed979b096
11 changed files with 229 additions and 61 deletions

View File

@@ -169,6 +169,19 @@ Arrow specification.'''
X = cp.random.random((n, 2))
xgb.DMatrix(X.toDlpack())
@pytest.mark.skipif(**tm.no_cupy())
def test_cupy_categorical(self):
import cupy as cp
n_features = 10
X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)
X = cp.asarray(X.values.astype(cp.float32))
y = cp.array(y)
feature_types = ['c'] * n_features
assert isinstance(X, cp.ndarray)
Xy = xgb.DMatrix(X, y, feature_types=feature_types)
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_device_dmat(self):
import cupy as cp