Export Python Interface for external memory. (#7070)

* Add Python iterator interface.
* Add tests.
* Add demo.
* Add documents.
* Handle empty dataset.
This commit is contained in:
Jiaming Yuan
2021-07-22 15:15:53 +08:00
committed by GitHub
parent e64ee6592f
commit e6088366df
34 changed files with 961 additions and 200 deletions

View File

@@ -0,0 +1,32 @@
import numpy as np
import xgboost as xgb
from hypothesis import given, strategies, settings
import pytest
import sys
sys.path.append("tests/python")
from test_data_iterator import SingleBatch, make_batches
from test_data_iterator import test_single_batch as cpu_single_batch
from test_data_iterator import run_data_iterator
from testing import IteratorForTest, no_cupy
def test_gpu_single_batch() -> None:
cpu_single_batch("gpu_hist")
@pytest.mark.skipif(**no_cupy())
@given(
strategies.integers(0, 1024), strategies.integers(1, 7), strategies.integers(0, 13)
)
@settings(deadline=None)
def test_gpu_data_iterator(
n_samples_per_batch: int, n_features: int, n_batches: int
) -> None:
run_data_iterator(n_samples_per_batch, n_features, n_batches, "gpu_hist", True)
run_data_iterator(n_samples_per_batch, n_features, n_batches, "gpu_hist", False)
def test_cpu_data_iterator() -> None:
"""Make sure CPU algorithm can handle GPU inputs"""
run_data_iterator(1024, 2, 3, "approx", True)

View File

@@ -9,7 +9,7 @@ import test_demos as td # noqa
@pytest.mark.skipif(**tm.no_cupy())
def test_data_iterator():
script = os.path.join(td.PYTHON_DEMO_DIR, 'data_iterator.py')
script = os.path.join(td.PYTHON_DEMO_DIR, 'quantile_data_iterator.py')
cmd = ['python', script]
subprocess.check_call(cmd)

View File

@@ -112,7 +112,6 @@ class TestGPUUpdaters:
tm.dataset_strategy)
@settings(deadline=None)
def test_external_memory(self, param, num_rounds, dataset):
pytest.xfail(reason='TestGPUUpdaters::test_external_memory is flaky')
# We cannot handle empty dataset yet
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'