Obtain CSR matrix from DMatrix. (#8269)
This commit is contained in:
@@ -1,13 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import tempfile
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import scipy.sparse
|
||||
import pytest
|
||||
from scipy.sparse import rand, csr_matrix
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse
|
||||
import testing as tm
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy.sparse import csr_matrix, rand
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
rng = np.random.RandomState(1)
|
||||
|
||||
@@ -433,3 +434,22 @@ class TestDMatrix:
|
||||
|
||||
def test_base_margin(self):
|
||||
set_base_margin_info(np.asarray, xgb.DMatrix, "hist")
|
||||
|
||||
@given(
|
||||
strategies.integers(0, 1000),
|
||||
strategies.integers(0, 100),
|
||||
strategies.fractions(0, 1),
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_to_csr(self, n_samples, n_features, sparsity) -> None:
|
||||
if n_samples == 0 or n_features == 0 or sparsity == 1.0:
|
||||
csr = scipy.sparse.csr_matrix(np.empty((0, 0)))
|
||||
else:
|
||||
csr = tm.make_sparse_regression(n_samples, n_features, sparsity, False)[
|
||||
0
|
||||
].astype(np.float32)
|
||||
m = xgb.DMatrix(data=csr)
|
||||
ret = m.get_data()
|
||||
np.testing.assert_equal(csr.indptr, ret.indptr)
|
||||
np.testing.assert_equal(csr.data, ret.data)
|
||||
np.testing.assert_equal(csr.indices, ret.indices)
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
from typing import Dict, List, Any
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy import sparse
|
||||
from testing import IteratorForTest, make_batches, make_batches_sparse, make_categorical
|
||||
from testing import (
|
||||
IteratorForTest,
|
||||
make_batches,
|
||||
make_batches_sparse,
|
||||
make_categorical,
|
||||
make_sparse_regression,
|
||||
)
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
@@ -102,6 +109,7 @@ class TestQuantileDMatrix:
|
||||
)
|
||||
if tree_method == "gpu_hist":
|
||||
import cudf
|
||||
|
||||
X = cudf.from_pandas(X)
|
||||
y = cudf.from_pandas(y)
|
||||
else:
|
||||
@@ -154,6 +162,7 @@ class TestQuantileDMatrix:
|
||||
X, y = make_categorical(n_samples, n_features, 13, onehot=False)
|
||||
if tree_method == "gpu_hist":
|
||||
import cudf
|
||||
|
||||
X = cudf.from_pandas(X)
|
||||
y = cudf.from_pandas(y)
|
||||
else:
|
||||
@@ -198,9 +207,7 @@ class TestQuantileDMatrix:
|
||||
|
||||
def test_predict(self) -> None:
|
||||
n_samples, n_features = 16, 2
|
||||
X, y = make_categorical(
|
||||
n_samples, n_features, n_categories=13, onehot=False
|
||||
)
|
||||
X, y = make_categorical(n_samples, n_features, n_categories=13, onehot=False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
|
||||
booster = xgb.train({"tree_method": "hist"}, Xy)
|
||||
@@ -210,3 +217,24 @@ class TestQuantileDMatrix:
|
||||
qXy = xgb.QuantileDMatrix(X, y, enable_categorical=True)
|
||||
b = booster.predict(qXy)
|
||||
np.testing.assert_allclose(a, b)
|
||||
|
||||
# we don't test empty Quantile DMatrix in single node construction.
|
||||
@given(
|
||||
strategies.integers(1, 1000),
|
||||
strategies.integers(1, 100),
|
||||
strategies.fractions(0, 0.99),
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_to_csr(self, n_samples: int, n_features: int, sparsity: float) -> None:
|
||||
csr, y = make_sparse_regression(n_samples, n_features, sparsity, False)
|
||||
csr = csr.astype(np.float32)
|
||||
qdm = xgb.QuantileDMatrix(data=csr, label=y)
|
||||
ret = qdm.get_data()
|
||||
np.testing.assert_equal(csr.indptr, ret.indptr)
|
||||
np.testing.assert_equal(csr.indices, ret.indices)
|
||||
|
||||
booster = xgb.train({"tree_method": "hist"}, dtrain=qdm)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
booster.predict(qdm), booster.predict(xgb.DMatrix(qdm.get_data()))
|
||||
)
|
||||
|
||||
@@ -577,6 +577,8 @@ def make_sparse_regression(
|
||||
|
||||
if as_dense:
|
||||
arr = csr.toarray()
|
||||
assert arr.shape[0] == n_samples
|
||||
assert arr.shape[1] == n_features
|
||||
arr[arr == 0] = np.nan
|
||||
return arr, y
|
||||
|
||||
|
||||
Reference in New Issue
Block a user