Tests for empty dmatrix. (#5159)
This commit is contained in:
parent
298ebe68ac
commit
ced3660f60
@ -6,6 +6,11 @@ import unittest
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_with_dask import run_empty_dmatrix # noqa
|
||||
from test_with_dask import generate_array # noqa
|
||||
import testing as tm # noqa
|
||||
|
||||
try:
|
||||
import dask.dataframe as dd
|
||||
from xgboost import dask as dxgb
|
||||
@ -15,10 +20,6 @@ try:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_with_dask import generate_array # noqa
|
||||
import testing as tm # noqa
|
||||
|
||||
|
||||
class TestDistributedGPU(unittest.TestCase):
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@ -52,42 +53,7 @@ class TestDistributedGPU(unittest.TestCase):
|
||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||
@pytest.mark.mgpu
|
||||
def test_empty_dmatrix(self):
|
||||
|
||||
def _check_outputs(out, predictions):
|
||||
assert isinstance(out['booster'], dxgb.Booster)
|
||||
assert len(out['history']['validation']['rmse']) == 2
|
||||
assert isinstance(predictions, np.ndarray)
|
||||
assert predictions.shape[0] == 1
|
||||
|
||||
parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
|
||||
'debug_synchronize': True}
|
||||
|
||||
with LocalCUDACluster() as cluster:
|
||||
with Client(cluster) as client:
|
||||
kRows, kCols = 1, 97
|
||||
X = dd.from_array(np.random.randn(kRows, kCols))
|
||||
y = dd.from_array(np.random.rand(kRows))
|
||||
dtrain = dxgb.DaskDMatrix(client, X, y)
|
||||
|
||||
out = dxgb.train(client, parameters,
|
||||
dtrain=dtrain,
|
||||
evals=[(dtrain, 'validation')],
|
||||
num_boost_round=2)
|
||||
predictions = dxgb.predict(client=client, model=out,
|
||||
data=dtrain).compute()
|
||||
_check_outputs(out, predictions)
|
||||
|
||||
# train has more rows than evals
|
||||
valid = dtrain
|
||||
kRows += 1
|
||||
X = dd.from_array(np.random.randn(kRows, kCols))
|
||||
y = dd.from_array(np.random.rand(kRows))
|
||||
dtrain = dxgb.DaskDMatrix(client, X, y)
|
||||
|
||||
out = dxgb.train(client, parameters,
|
||||
dtrain=dtrain,
|
||||
evals=[(valid, 'validation')],
|
||||
num_boost_round=2)
|
||||
predictions = dxgb.predict(client=client, model=out,
|
||||
data=valid).compute()
|
||||
_check_outputs(out, predictions)
|
||||
parameters = {'tree_method': 'gpu_hist'}
|
||||
run_empty_dmatrix(client, parameters)
|
||||
|
||||
@ -122,3 +122,53 @@ def test_classifier(client):
|
||||
|
||||
assert prediction.ndim == 1
|
||||
assert prediction.shape[0] == kRows
|
||||
|
||||
|
||||
def run_empty_dmatrix(client, parameters):
|
||||
|
||||
def _check_outputs(out, predictions):
|
||||
assert isinstance(out['booster'], xgb.dask.Booster)
|
||||
assert len(out['history']['validation']['rmse']) == 2
|
||||
assert isinstance(predictions, np.ndarray)
|
||||
assert predictions.shape[0] == 1
|
||||
|
||||
kRows, kCols = 1, 97
|
||||
X = dd.from_array(np.random.randn(kRows, kCols))
|
||||
y = dd.from_array(np.random.rand(kRows))
|
||||
dtrain = xgb.dask.DaskDMatrix(client, X, y)
|
||||
|
||||
out = xgb.dask.train(client, parameters,
|
||||
dtrain=dtrain,
|
||||
evals=[(dtrain, 'validation')],
|
||||
num_boost_round=2)
|
||||
predictions = xgb.dask.predict(client=client, model=out,
|
||||
data=dtrain).compute()
|
||||
_check_outputs(out, predictions)
|
||||
|
||||
# train has more rows than evals
|
||||
valid = dtrain
|
||||
kRows += 1
|
||||
X = dd.from_array(np.random.randn(kRows, kCols))
|
||||
y = dd.from_array(np.random.rand(kRows))
|
||||
dtrain = xgb.dask.DaskDMatrix(client, X, y)
|
||||
|
||||
out = xgb.dask.train(client, parameters,
|
||||
dtrain=dtrain,
|
||||
evals=[(valid, 'validation')],
|
||||
num_boost_round=2)
|
||||
predictions = xgb.dask.predict(client=client, model=out,
|
||||
data=valid).compute()
|
||||
_check_outputs(out, predictions)
|
||||
|
||||
|
||||
# No test for Exact, as empty DMatrix handling are mostly for distributed
|
||||
# environment and Exact doesn't support it.
|
||||
|
||||
def test_empty_dmatrix_hist(client):
|
||||
parameters = {'tree_method': 'hist'}
|
||||
run_empty_dmatrix(client, parameters)
|
||||
|
||||
|
||||
def test_empty_dmatrix_approx(client):
|
||||
parameters = {'tree_method': 'approx'}
|
||||
run_empty_dmatrix(client, parameters)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user