Run training with empty DMatrix. (#4990)
This makes GPU Hist robust in distributed environment as some workers might not be associated with any data in either training or evaluation. * Disable rabit mock test for now: See #5012 . * Disable dask-cudf test at prediction for now: See #5003 * Launch dask job for all workers despite they might not have any data. * Check 0 rows in elementwise evaluation metrics. Using AUC and AUC-PR still throws an error. See #4663 for a robust fix. * Add tests for edge cases. * Add `LaunchKernel` wrapper handling zero sized grid. * Move some parts of allreducer into a cu file. * Don't validate feature names when the booster is empty. * Sync number of columns in DMatrix. As num_feature is required to be the same across all workers in data split mode. * Filtering in dask interface now by default syncs all booster that's not empty, instead of using rank 0. * Fix Jenkins' GPU tests. * Install dask-cuda from source in Jenkins' test. Now all tests are actually running. * Restore GPU Hist tree synchronization test. * Check UUID of running devices. The check is only performed on CUDA version >= 10.x, as 9.x doesn't have UUID field. * Fix CMake policy and project variables. Use xgboost_SOURCE_DIR uniformly, add policy for CMake >= 3.13. * Fix copying data to CPU * Fix race condition in cpu predictor. * Fix duplicated DMatrix construction. * Don't download extra nccl in CI script.
This commit is contained in:
@@ -2,6 +2,7 @@ import numpy as np
|
||||
import sys
|
||||
import unittest
|
||||
import pytest
|
||||
import xgboost
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from regression_test_utilities import run_suite, parameter_combinations, \
|
||||
@@ -21,7 +22,8 @@ datasets = ["Boston", "Cancer", "Digits", "Sparse regression",
|
||||
|
||||
class TestGPU(unittest.TestCase):
|
||||
def test_gpu_hist(self):
|
||||
test_param = parameter_combinations({'gpu_id': [0], 'max_depth': [2, 8],
|
||||
test_param = parameter_combinations({'gpu_id': [0],
|
||||
'max_depth': [2, 8],
|
||||
'max_leaves': [255, 4],
|
||||
'max_bin': [2, 256],
|
||||
'grow_policy': ['lossguide']})
|
||||
@@ -36,6 +38,31 @@ class TestGPU(unittest.TestCase):
|
||||
cpu_results = run_suite(param, select_datasets=datasets)
|
||||
assert_gpu_results(cpu_results, gpu_results)
|
||||
|
||||
def test_with_empty_dmatrix(self):
|
||||
# FIXME(trivialfis): This should be done with all updaters
|
||||
kRows = 0
|
||||
kCols = 100
|
||||
|
||||
X = np.empty((kRows, kCols))
|
||||
y = np.empty((kRows))
|
||||
|
||||
dtrain = xgboost.DMatrix(X, y)
|
||||
|
||||
bst = xgboost.train({'verbosity': 2,
|
||||
'tree_method': 'gpu_hist',
|
||||
'gpu_id': 0},
|
||||
dtrain,
|
||||
verbose_eval=True,
|
||||
num_boost_round=6,
|
||||
evals=[(dtrain, 'Train')])
|
||||
|
||||
kRows = 100
|
||||
X = np.random.randn(kRows, kCols)
|
||||
|
||||
dtest = xgboost.DMatrix(X)
|
||||
predictions = bst.predict(dtest)
|
||||
np.testing.assert_allclose(predictions, 0.5, 1e-6)
|
||||
|
||||
@pytest.mark.mgpu
|
||||
def test_specified_gpu_id_gpu_update(self):
|
||||
variable_param = {'gpu_id': [1],
|
||||
|
||||
Reference in New Issue
Block a user