Use Booster context in DMatrix. (#8896)

- Pass context from booster to DMatrix.
- Use context instead of integer for `n_threads`.
- Check the consistency configuration for `max_bin`.
- Test for all combinations of initialization options.
This commit is contained in:
Jiaming Yuan
2023-04-28 21:47:14 +08:00
committed by GitHub
parent 1f9a57d17b
commit 08ce495b5d
67 changed files with 1283 additions and 935 deletions

View File

@@ -18,6 +18,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_feature_weights(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
data = rng.randn(5, 5)
m = xgb.DMatrix(data)
@@ -26,23 +27,91 @@ class TestQuantileDMatrix:
m.set_info(feature_weights=feature_weights)
cp.testing.assert_array_equal(
cp.array(m.get_float_info('feature_weights')),
feature_weights.astype(np.float32))
cp.array(m.get_float_info("feature_weights")),
feature_weights.astype(np.float32),
)
@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_cupy_init(self) -> None:
import cupy as cp
data = cp.random.randn(5, 5)
xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64))
@pytest.mark.parametrize(
"on_device,tree_method",
[(True, "hist"), (False, "gpu_hist"), (False, "hist"), (True, "gpu_hist")],
)
def test_initialization(self, on_device: bool, tree_method: str) -> None:
n_samples, n_features, max_bin = 64, 3, 16
X, y, w = tm.make_batches(
n_samples,
n_features=n_features,
n_batches=1,
use_cupy=on_device,
)
# Init SparsePage
Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
# Init GIDX/Ellpack
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin},
Xy,
num_boost_round=1,
)
# query cuts from GIDX/Ellpack
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
tm.predictor_equal(Xy, qXy)
with pytest.raises(ValueError, match="Inconsistent"):
# max_bin changed.
xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
# No error, DMatrix can be modified for different training session.
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin - 1},
Xy,
num_boost_round=1,
)
# Init Ellpack/GIDX
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)
# Init GIDX/Ellpack
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin},
Xy,
num_boost_round=1,
)
# query cuts from GIDX/Ellpack
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
tm.predictor_equal(Xy, qXy)
with pytest.raises(ValueError, match="Inconsistent"):
# max_bin changed.
xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
booster0 = xgb.train(
{"tree_method": "hist", "max_bin": max_bin, "max_depth": 4},
Xy,
num_boost_round=1,
)
booster1 = xgb.train(
{"tree_method": "gpu_hist", "max_bin": max_bin, "max_depth": 4},
Xy,
num_boost_round=1,
)
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
predt0 = booster0.predict(qXy)
predt1 = booster1.predict(qXy)
np.testing.assert_allclose(predt0, predt1)
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.parametrize(
"tree_method,max_bin", [
("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)
]
"tree_method,max_bin",
[("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)],
)
def test_interoperability(self, tree_method: str, max_bin: int) -> None:
import cupy as cp
n_samples = 64
n_features = 3
X, y, w = tm.make_batches(
@@ -75,6 +144,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_metainfo(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
rows = 10
@@ -98,6 +168,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cudf())
def test_ref_dmatrix(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
self.cputest.run_ref_dmatrix(rng, "gpu_hist", False)
@@ -158,5 +229,6 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_check_inf(self) -> None:
import cupy as cp
rng = cp.random.default_rng(1994)
check_inf(rng)

View File

@@ -153,12 +153,18 @@ class TestGPUUpdaters:
tm.dataset_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
def test_gpu_hist_device_dmatrix(
self, param: dict, num_rounds: int, dataset: tm.TestDataset
) -> None:
# We cannot handle empty dataset yet
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
result = train_result(param, dataset.get_device_dmat(), num_rounds)
result = train_result(
param,
dataset.get_device_dmat(max_bin=param.get("max_bin", None)),
num_rounds
)
note(result)
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)