xgboost/tests/python-gpu/test_gpu_updaters.py
Jiaming Yuan 7663de956c
Run training with empty DMatrix. (#4990)
This makes GPU Hist robust in distributed environment as some workers might not
be associated with any data in either training or evaluation.

* Disable rabit mock test for now: See #5012 .

* Disable dask-cudf test at prediction for now: See #5003

* Launch dask job for all workers despite they might not have any data.
* Check 0 rows in elementwise evaluation metrics.

   Using AUC and AUC-PR still throws an error.  See #4663 for a robust fix.

* Add tests for edge cases.
* Add `LaunchKernel` wrapper handling zero sized grid.
* Move some parts of allreducer into a cu file.
* Don't validate feature names when the booster is empty.

* Sync number of columns in DMatrix.

  As num_feature is required to be the same across all workers in data split
  mode.

* Filtering in dask interface now by default syncs all booster that's not
empty, instead of using rank 0.

* Fix Jenkins' GPU tests.

* Install dask-cuda from source in Jenkins' test.

  Now all tests are actually running.

* Restore GPU Hist tree synchronization test.

* Check UUID of running devices.

  The check is only performed on CUDA version >= 10.x, as 9.x doesn't have UUID field.

* Fix CMake policy and project variables.

  Use xgboost_SOURCE_DIR uniformly, add policy for CMake >= 3.13.

* Fix copying data to CPU

* Fix race condition in cpu predictor.

* Fix duplicated DMatrix construction.

* Don't download extra nccl in CI script.
2019-11-06 16:13:13 +08:00

77 lines
2.8 KiB
Python

import numpy as np
import sys
import unittest
import pytest
import xgboost
sys.path.append("tests/python")
from regression_test_utilities import run_suite, parameter_combinations, \
assert_results_non_increasing
def assert_gpu_results(cpu_results, gpu_results):
for cpu_res, gpu_res in zip(cpu_results, gpu_results):
# Check final eval result roughly equivalent
assert np.allclose(cpu_res["eval"][-1],
gpu_res["eval"][-1], 1e-2, 1e-2)
datasets = ["Boston", "Cancer", "Digits", "Sparse regression",
"Sparse regression with weights", "Small weights regression"]
class TestGPU(unittest.TestCase):
def test_gpu_hist(self):
test_param = parameter_combinations({'gpu_id': [0],
'max_depth': [2, 8],
'max_leaves': [255, 4],
'max_bin': [2, 256],
'grow_policy': ['lossguide']})
test_param.append({'single_precision_histogram': True})
test_param.append({'min_child_weight': 0,
'lambda': 0})
for param in test_param:
param['tree_method'] = 'gpu_hist'
gpu_results = run_suite(param, select_datasets=datasets)
assert_results_non_increasing(gpu_results, 1e-2)
param['tree_method'] = 'hist'
cpu_results = run_suite(param, select_datasets=datasets)
assert_gpu_results(cpu_results, gpu_results)
def test_with_empty_dmatrix(self):
# FIXME(trivialfis): This should be done with all updaters
kRows = 0
kCols = 100
X = np.empty((kRows, kCols))
y = np.empty((kRows))
dtrain = xgboost.DMatrix(X, y)
bst = xgboost.train({'verbosity': 2,
'tree_method': 'gpu_hist',
'gpu_id': 0},
dtrain,
verbose_eval=True,
num_boost_round=6,
evals=[(dtrain, 'Train')])
kRows = 100
X = np.random.randn(kRows, kCols)
dtest = xgboost.DMatrix(X)
predictions = bst.predict(dtest)
np.testing.assert_allclose(predictions, 0.5, 1e-6)
@pytest.mark.mgpu
def test_specified_gpu_id_gpu_update(self):
variable_param = {'gpu_id': [1],
'max_depth': [8],
'max_leaves': [255, 4],
'max_bin': [2, 64],
'grow_policy': ['lossguide'],
'tree_method': ['gpu_hist']}
for param in parameter_combinations(variable_param):
gpu_results = run_suite(param, select_datasets=datasets)
assert_results_non_increasing(gpu_results, 1e-2)