Run training with empty DMatrix. (#4990)

This makes GPU Hist robust in distributed environment as some workers might not
be associated with any data in either training or evaluation.

* Disable rabit mock test for now: See #5012 .

* Disable dask-cudf test at prediction for now: See #5003

* Launch dask job for all workers despite they might not have any data.
* Check 0 rows in elementwise evaluation metrics.

   Using AUC and AUC-PR still throws an error.  See #4663 for a robust fix.

* Add tests for edge cases.
* Add `LaunchKernel` wrapper handling zero sized grid.
* Move some parts of allreducer into a cu file.
* Don't validate feature names when the booster is empty.

* Sync number of columns in DMatrix.

  As num_feature is required to be the same across all workers in data split
  mode.

* Filtering in dask interface now by default syncs all booster that's not
empty, instead of using rank 0.

* Fix Jenkins' GPU tests.

* Install dask-cuda from source in Jenkins' test.

  Now all tests are actually running.

* Restore GPU Hist tree synchronization test.

* Check UUID of running devices.

  The check is only performed on CUDA version >= 10.x, as 9.x doesn't have UUID field.

* Fix CMake policy and project variables.

  Use xgboost_SOURCE_DIR uniformly, add policy for CMake >= 3.13.

* Fix copying data to CPU

* Fix race condition in cpu predictor.

* Fix duplicated DMatrix construction.

* Don't download extra nccl in CI script.
This commit is contained in:
Jiaming Yuan
2019-11-06 16:13:13 +08:00
committed by GitHub
parent 807a244517
commit 7663de956c
44 changed files with 603 additions and 272 deletions

View File

@@ -2,6 +2,7 @@ import numpy as np
import sys
import unittest
import pytest
import xgboost
sys.path.append("tests/python")
from regression_test_utilities import run_suite, parameter_combinations, \
@@ -21,7 +22,8 @@ datasets = ["Boston", "Cancer", "Digits", "Sparse regression",
class TestGPU(unittest.TestCase):
def test_gpu_hist(self):
test_param = parameter_combinations({'gpu_id': [0], 'max_depth': [2, 8],
test_param = parameter_combinations({'gpu_id': [0],
'max_depth': [2, 8],
'max_leaves': [255, 4],
'max_bin': [2, 256],
'grow_policy': ['lossguide']})
@@ -36,6 +38,31 @@ class TestGPU(unittest.TestCase):
cpu_results = run_suite(param, select_datasets=datasets)
assert_gpu_results(cpu_results, gpu_results)
def test_with_empty_dmatrix(self):
# FIXME(trivialfis): This should be done with all updaters
kRows = 0
kCols = 100
X = np.empty((kRows, kCols))
y = np.empty((kRows))
dtrain = xgboost.DMatrix(X, y)
bst = xgboost.train({'verbosity': 2,
'tree_method': 'gpu_hist',
'gpu_id': 0},
dtrain,
verbose_eval=True,
num_boost_round=6,
evals=[(dtrain, 'Train')])
kRows = 100
X = np.random.randn(kRows, kCols)
dtest = xgboost.DMatrix(X)
predictions = bst.predict(dtest)
np.testing.assert_allclose(predictions, 0.5, 1e-6)
@pytest.mark.mgpu
def test_specified_gpu_id_gpu_update(self):
variable_param = {'gpu_id': [1],

View File

@@ -1,45 +1,94 @@
import sys
import pytest
import numpy as np
import unittest
if sys.platform.startswith("win"):
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
try:
from distributed.utils_test import client, loop, cluster_fixture
import dask.dataframe as dd
from xgboost import dask as dxgb
from dask_cuda import LocalCUDACluster
from dask.distributed import Client
import cudf
except ImportError:
client = None
loop = None
cluster_fixture = None
pass
sys.path.append("tests/python")
from test_with_dask import generate_array
import testing as tm
from test_with_dask import generate_array # noqa
import testing as tm # noqa
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_dask_cudf())
def test_dask_dataframe(client):
X, y = generate_array()
class TestDistributedGPU(unittest.TestCase):
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_dask_cudf())
@pytest.mark.skipif(**tm.no_dask_cuda())
def test_dask_dataframe(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
X, y = generate_array()
X = dd.from_dask_array(X)
y = dd.from_dask_array(y)
X = dd.from_dask_array(X)
y = dd.from_dask_array(y)
X = X.map_partitions(cudf.from_pandas)
y = y.map_partitions(cudf.from_pandas)
X = X.map_partitions(cudf.from_pandas)
y = y.map_partitions(cudf.from_pandas)
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
dtrain=dtrain,
evals=[(dtrain, 'X')],
num_boost_round=2)
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
dtrain=dtrain,
evals=[(dtrain, 'X')],
num_boost_round=2)
assert isinstance(out['booster'], dxgb.Booster)
assert len(out['history']['X']['rmse']) == 2
assert isinstance(out['booster'], dxgb.Booster)
assert len(out['history']['X']['rmse']) == 2
predictions = dxgb.predict(out, dtrain)
predictions = predictions.compute()
# FIXME(trivialfis): Re-enable this after #5003 is fixed
# predictions = dxgb.predict(client, out, dtrain).compute()
# assert isinstance(predictions, np.ndarray)
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.mgpu
def test_empty_dmatrix(self):
def _check_outputs(out, predictions):
assert isinstance(out['booster'], dxgb.Booster)
assert len(out['history']['validation']['rmse']) == 2
assert isinstance(predictions, np.ndarray)
assert predictions.shape[0] == 1
parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
'debug_synchronize': True}
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
kRows, kCols = 1, 97
X = dd.from_array(np.random.randn(kRows, kCols))
y = dd.from_array(np.random.rand(kRows))
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, parameters,
dtrain=dtrain,
evals=[(dtrain, 'validation')],
num_boost_round=2)
predictions = dxgb.predict(client=client, model=out,
data=dtrain).compute()
_check_outputs(out, predictions)
# train has more rows than evals
valid = dtrain
kRows += 1
X = dd.from_array(np.random.randn(kRows, kCols))
y = dd.from_array(np.random.rand(kRows))
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, parameters,
dtrain=dtrain,
evals=[(valid, 'validation')],
num_boost_round=2)
predictions = dxgb.predict(client=client, model=out,
data=valid).compute()
_check_outputs(out, predictions)