Obtain CSR matrix from DMatrix. (#8269)
This commit is contained in:
@@ -125,6 +125,7 @@ if __name__ == "__main__":
|
||||
# tests
|
||||
"tests/python/test_config.py",
|
||||
"tests/python/test_spark/",
|
||||
"tests/python/test_quantile_dmatrix.py",
|
||||
"tests/python-gpu/test_gpu_spark/",
|
||||
"tests/ci_build/lint_python.py",
|
||||
# demo
|
||||
|
||||
@@ -2,6 +2,8 @@ import numpy as np
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
import sys
|
||||
from hypothesis import given, strategies, settings
|
||||
from scipy import sparse
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
@@ -96,3 +98,42 @@ class TestDeviceQuantileDMatrix:
|
||||
import cupy as cp
|
||||
rng = cp.random.RandomState(1994)
|
||||
self.cputest.run_ref_dmatrix(rng, "gpu_hist", False)
|
||||
|
||||
@given(
|
||||
strategies.integers(1, 1000),
|
||||
strategies.integers(1, 100),
|
||||
strategies.fractions(0, 0.99),
|
||||
)
|
||||
@settings(print_blob=True, deadline=None)
|
||||
def test_to_csr(self, n_samples, n_features, sparsity) -> None:
|
||||
import cupy as cp
|
||||
X, y = tm.make_sparse_regression(
|
||||
n_samples, n_features, sparsity, False
|
||||
)
|
||||
h_X = X.astype(np.float32)
|
||||
|
||||
csr = h_X
|
||||
h_X = X.toarray()
|
||||
h_X[h_X == 0] = np.nan
|
||||
|
||||
h_m = xgb.QuantileDMatrix(data=h_X)
|
||||
h_ret = h_m.get_data()
|
||||
|
||||
d_X = cp.array(h_X)
|
||||
|
||||
d_m = xgb.QuantileDMatrix(data=d_X, label=y)
|
||||
d_ret = d_m.get_data()
|
||||
|
||||
np.testing.assert_equal(csr.indptr, d_ret.indptr)
|
||||
np.testing.assert_equal(csr.indices, d_ret.indices)
|
||||
|
||||
np.testing.assert_equal(h_ret.indptr, d_ret.indptr)
|
||||
np.testing.assert_equal(h_ret.indices, d_ret.indices)
|
||||
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain=d_m)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
booster.predict(d_m),
|
||||
booster.predict(xgb.DMatrix(d_m.get_data())),
|
||||
atol=1e-6
|
||||
)
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import tempfile
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import scipy.sparse
|
||||
import pytest
|
||||
from scipy.sparse import rand, csr_matrix
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse
|
||||
import testing as tm
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy.sparse import csr_matrix, rand
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
rng = np.random.RandomState(1)
|
||||
|
||||
@@ -433,3 +434,22 @@ class TestDMatrix:
|
||||
|
||||
def test_base_margin(self):
|
||||
set_base_margin_info(np.asarray, xgb.DMatrix, "hist")
|
||||
|
||||
@given(
|
||||
strategies.integers(0, 1000),
|
||||
strategies.integers(0, 100),
|
||||
strategies.fractions(0, 1),
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_to_csr(self, n_samples, n_features, sparsity) -> None:
|
||||
if n_samples == 0 or n_features == 0 or sparsity == 1.0:
|
||||
csr = scipy.sparse.csr_matrix(np.empty((0, 0)))
|
||||
else:
|
||||
csr = tm.make_sparse_regression(n_samples, n_features, sparsity, False)[
|
||||
0
|
||||
].astype(np.float32)
|
||||
m = xgb.DMatrix(data=csr)
|
||||
ret = m.get_data()
|
||||
np.testing.assert_equal(csr.indptr, ret.indptr)
|
||||
np.testing.assert_equal(csr.data, ret.data)
|
||||
np.testing.assert_equal(csr.indices, ret.indices)
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
from typing import Dict, List, Any
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy import sparse
|
||||
from testing import IteratorForTest, make_batches, make_batches_sparse, make_categorical
|
||||
from testing import (
|
||||
IteratorForTest,
|
||||
make_batches,
|
||||
make_batches_sparse,
|
||||
make_categorical,
|
||||
make_sparse_regression,
|
||||
)
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
@@ -102,6 +109,7 @@ class TestQuantileDMatrix:
|
||||
)
|
||||
if tree_method == "gpu_hist":
|
||||
import cudf
|
||||
|
||||
X = cudf.from_pandas(X)
|
||||
y = cudf.from_pandas(y)
|
||||
else:
|
||||
@@ -154,6 +162,7 @@ class TestQuantileDMatrix:
|
||||
X, y = make_categorical(n_samples, n_features, 13, onehot=False)
|
||||
if tree_method == "gpu_hist":
|
||||
import cudf
|
||||
|
||||
X = cudf.from_pandas(X)
|
||||
y = cudf.from_pandas(y)
|
||||
else:
|
||||
@@ -198,9 +207,7 @@ class TestQuantileDMatrix:
|
||||
|
||||
def test_predict(self) -> None:
|
||||
n_samples, n_features = 16, 2
|
||||
X, y = make_categorical(
|
||||
n_samples, n_features, n_categories=13, onehot=False
|
||||
)
|
||||
X, y = make_categorical(n_samples, n_features, n_categories=13, onehot=False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
|
||||
booster = xgb.train({"tree_method": "hist"}, Xy)
|
||||
@@ -210,3 +217,24 @@ class TestQuantileDMatrix:
|
||||
qXy = xgb.QuantileDMatrix(X, y, enable_categorical=True)
|
||||
b = booster.predict(qXy)
|
||||
np.testing.assert_allclose(a, b)
|
||||
|
||||
# we don't test empty Quantile DMatrix in single node construction.
|
||||
@given(
|
||||
strategies.integers(1, 1000),
|
||||
strategies.integers(1, 100),
|
||||
strategies.fractions(0, 0.99),
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_to_csr(self, n_samples: int, n_features: int, sparsity: float) -> None:
|
||||
csr, y = make_sparse_regression(n_samples, n_features, sparsity, False)
|
||||
csr = csr.astype(np.float32)
|
||||
qdm = xgb.QuantileDMatrix(data=csr, label=y)
|
||||
ret = qdm.get_data()
|
||||
np.testing.assert_equal(csr.indptr, ret.indptr)
|
||||
np.testing.assert_equal(csr.indices, ret.indices)
|
||||
|
||||
booster = xgb.train({"tree_method": "hist"}, dtrain=qdm)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
booster.predict(qdm), booster.predict(xgb.DMatrix(qdm.get_data()))
|
||||
)
|
||||
|
||||
@@ -577,6 +577,8 @@ def make_sparse_regression(
|
||||
|
||||
if as_dense:
|
||||
arr = csr.toarray()
|
||||
assert arr.shape[0] == n_samples
|
||||
assert arr.shape[1] == n_features
|
||||
arr[arr == 0] = np.nan
|
||||
return arr, y
|
||||
|
||||
|
||||
Reference in New Issue
Block a user