Deterministic data partitioning for external memory (#6317)

* Make external memory data partitioning deterministic.

* Change the meaning of `page_size` from bytes to number of rows.

* Design a data pool.

* Note for external memory.

* Enable unity build on Windows CI.

* Force garbage collect on test.
This commit is contained in:
Jiaming Yuan
2020-11-11 06:11:06 +08:00
committed by GitHub
parent 9564886d9f
commit 43efadea2e
15 changed files with 334 additions and 88 deletions

View File

@@ -1,5 +1,6 @@
import numpy as np
import sys
import gc
import pytest
import xgboost as xgb
from hypothesis import given, strategies, assume, settings, note
@@ -118,7 +119,10 @@ class TestGPUUpdaters:
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
external_result = train_result(param, dataset.get_external_dmat(), num_rounds)
m = dataset.get_external_dmat()
external_result = train_result(param, m, num_rounds)
del m
gc.collect()
assert tm.non_increasing(external_result['train'][dataset.metric])
def test_empty_dmatrix_prediction(self):