Tests for empty dmatrix. (#5159)
This commit is contained in:
parent
298ebe68ac
commit
ced3660f60
@ -6,6 +6,11 @@ import unittest
|
|||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||||
|
|
||||||
|
sys.path.append("tests/python")
|
||||||
|
from test_with_dask import run_empty_dmatrix # noqa
|
||||||
|
from test_with_dask import generate_array # noqa
|
||||||
|
import testing as tm # noqa
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import dask.dataframe as dd
|
import dask.dataframe as dd
|
||||||
from xgboost import dask as dxgb
|
from xgboost import dask as dxgb
|
||||||
@ -15,10 +20,6 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
sys.path.append("tests/python")
|
|
||||||
from test_with_dask import generate_array # noqa
|
|
||||||
import testing as tm # noqa
|
|
||||||
|
|
||||||
|
|
||||||
class TestDistributedGPU(unittest.TestCase):
|
class TestDistributedGPU(unittest.TestCase):
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@ -52,42 +53,7 @@ class TestDistributedGPU(unittest.TestCase):
|
|||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
@pytest.mark.mgpu
|
@pytest.mark.mgpu
|
||||||
def test_empty_dmatrix(self):
|
def test_empty_dmatrix(self):
|
||||||
|
|
||||||
def _check_outputs(out, predictions):
|
|
||||||
assert isinstance(out['booster'], dxgb.Booster)
|
|
||||||
assert len(out['history']['validation']['rmse']) == 2
|
|
||||||
assert isinstance(predictions, np.ndarray)
|
|
||||||
assert predictions.shape[0] == 1
|
|
||||||
|
|
||||||
parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
|
|
||||||
'debug_synchronize': True}
|
|
||||||
|
|
||||||
with LocalCUDACluster() as cluster:
|
with LocalCUDACluster() as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
kRows, kCols = 1, 97
|
parameters = {'tree_method': 'gpu_hist'}
|
||||||
X = dd.from_array(np.random.randn(kRows, kCols))
|
run_empty_dmatrix(client, parameters)
|
||||||
y = dd.from_array(np.random.rand(kRows))
|
|
||||||
dtrain = dxgb.DaskDMatrix(client, X, y)
|
|
||||||
|
|
||||||
out = dxgb.train(client, parameters,
|
|
||||||
dtrain=dtrain,
|
|
||||||
evals=[(dtrain, 'validation')],
|
|
||||||
num_boost_round=2)
|
|
||||||
predictions = dxgb.predict(client=client, model=out,
|
|
||||||
data=dtrain).compute()
|
|
||||||
_check_outputs(out, predictions)
|
|
||||||
|
|
||||||
# train has more rows than evals
|
|
||||||
valid = dtrain
|
|
||||||
kRows += 1
|
|
||||||
X = dd.from_array(np.random.randn(kRows, kCols))
|
|
||||||
y = dd.from_array(np.random.rand(kRows))
|
|
||||||
dtrain = dxgb.DaskDMatrix(client, X, y)
|
|
||||||
|
|
||||||
out = dxgb.train(client, parameters,
|
|
||||||
dtrain=dtrain,
|
|
||||||
evals=[(valid, 'validation')],
|
|
||||||
num_boost_round=2)
|
|
||||||
predictions = dxgb.predict(client=client, model=out,
|
|
||||||
data=valid).compute()
|
|
||||||
_check_outputs(out, predictions)
|
|
||||||
|
|||||||
@ -122,3 +122,53 @@ def test_classifier(client):
|
|||||||
|
|
||||||
assert prediction.ndim == 1
|
assert prediction.ndim == 1
|
||||||
assert prediction.shape[0] == kRows
|
assert prediction.shape[0] == kRows
|
||||||
|
|
||||||
|
|
||||||
|
def run_empty_dmatrix(client, parameters):
|
||||||
|
|
||||||
|
def _check_outputs(out, predictions):
|
||||||
|
assert isinstance(out['booster'], xgb.dask.Booster)
|
||||||
|
assert len(out['history']['validation']['rmse']) == 2
|
||||||
|
assert isinstance(predictions, np.ndarray)
|
||||||
|
assert predictions.shape[0] == 1
|
||||||
|
|
||||||
|
kRows, kCols = 1, 97
|
||||||
|
X = dd.from_array(np.random.randn(kRows, kCols))
|
||||||
|
y = dd.from_array(np.random.rand(kRows))
|
||||||
|
dtrain = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
|
||||||
|
out = xgb.dask.train(client, parameters,
|
||||||
|
dtrain=dtrain,
|
||||||
|
evals=[(dtrain, 'validation')],
|
||||||
|
num_boost_round=2)
|
||||||
|
predictions = xgb.dask.predict(client=client, model=out,
|
||||||
|
data=dtrain).compute()
|
||||||
|
_check_outputs(out, predictions)
|
||||||
|
|
||||||
|
# train has more rows than evals
|
||||||
|
valid = dtrain
|
||||||
|
kRows += 1
|
||||||
|
X = dd.from_array(np.random.randn(kRows, kCols))
|
||||||
|
y = dd.from_array(np.random.rand(kRows))
|
||||||
|
dtrain = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
|
||||||
|
out = xgb.dask.train(client, parameters,
|
||||||
|
dtrain=dtrain,
|
||||||
|
evals=[(valid, 'validation')],
|
||||||
|
num_boost_round=2)
|
||||||
|
predictions = xgb.dask.predict(client=client, model=out,
|
||||||
|
data=valid).compute()
|
||||||
|
_check_outputs(out, predictions)
|
||||||
|
|
||||||
|
|
||||||
|
# No test for Exact, as empty DMatrix handling are mostly for distributed
|
||||||
|
# environment and Exact doesn't support it.
|
||||||
|
|
||||||
|
def test_empty_dmatrix_hist(client):
|
||||||
|
parameters = {'tree_method': 'hist'}
|
||||||
|
run_empty_dmatrix(client, parameters)
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_dmatrix_approx(client):
|
||||||
|
parameters = {'tree_method': 'approx'}
|
||||||
|
run_empty_dmatrix(client, parameters)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user