Implement GK sketching on GPU. (#5846)

* Implement GK sketching on GPU.
* Strong tests on quantile building.
* Handle sparse dataset by binary searching the column index.
* Hypothesis test on dask.
This commit is contained in:
Jiaming Yuan
2020-07-07 12:16:21 +08:00
committed by GitHub
parent ac3f0e78dc
commit 048d969be4
25 changed files with 2045 additions and 405 deletions

View File

@@ -1,10 +1,12 @@
# coding: utf-8
import os
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
from xgboost.compat import DASK_INSTALLED
from hypothesis import strategies
from hypothesis.extra.numpy import arrays
from joblib import Memory
from sklearn import datasets
import tempfile
import xgboost as xgb
import numpy as np
@@ -123,10 +125,15 @@ class TestDataset:
return xgb.DeviceQuantileDMatrix(X, y, w)
def get_external_dmat(self):
np.savetxt('tmptmp_1234.csv', np.hstack((self.y.reshape(len(self.y), 1), self.X)),
delimiter=',')
return xgb.DMatrix('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
weight=self.w)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, 'tmptmp_1234.csv')
np.savetxt(path,
np.hstack((self.y.reshape(len(self.y), 1), self.X)),
delimiter=',')
uri = path + '?format=csv&label_column=0#tmptmp_'
# The uri looks like:
# 'tmptmp_1234.csv?format=csv&label_column=0#tmptmp_'
return xgb.DMatrix(uri, weight=self.w)
def __repr__(self):
return self.name
@@ -181,6 +188,7 @@ def _dataset_and_weight(draw):
data.w = draw(arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)))
return data
# A strategy for drawing from a set of example datasets
# May add random weights to the dataset
dataset_strategy = _dataset_and_weight()