Speed up python test (#5752)
* Speed up tests * Prevent DeviceQuantileDMatrix initialisation with numpy * Use joblib.memory * Use RandomState
This commit is contained in:
parent
cfc23c6a6b
commit
359023c0fa
@ -566,10 +566,6 @@ class DeviceQuantileCudaArrayInterfaceHandler(
|
|||||||
|
|
||||||
__device_quantile_dmatrix_registry.register_handler(
|
__device_quantile_dmatrix_registry.register_handler(
|
||||||
'cupy.core.core', 'ndarray', DeviceQuantileCudaArrayInterfaceHandler)
|
'cupy.core.core', 'ndarray', DeviceQuantileCudaArrayInterfaceHandler)
|
||||||
__device_quantile_dmatrix_registry.register_handler_opaque(
|
|
||||||
lambda x: hasattr(x, '__array__'), NumpyHandler)
|
|
||||||
__device_quantile_dmatrix_registry.register_handler_opaque(
|
|
||||||
lambda x: hasattr(x, '__cuda_array_interface__'), NumpyHandler)
|
|
||||||
|
|
||||||
|
|
||||||
class DeviceQuantileCudaColumnarHandler(DeviceQuantileDMatrixDataHandler,
|
class DeviceQuantileCudaColumnarHandler(DeviceQuantileDMatrixDataHandler,
|
||||||
|
|||||||
22
tests/python-gpu/test_device_quantile_dmatrix.py
Normal file
22
tests/python-gpu/test_device_quantile_dmatrix.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import numpy as np
|
||||||
|
import xgboost as xgb
|
||||||
|
import unittest
|
||||||
|
import pytest
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("tests/python")
|
||||||
|
import testing as tm
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeviceQuantileDMatrix(unittest.TestCase):
|
||||||
|
def test_dmatrix_numpy_init(self):
|
||||||
|
data = np.random.randn(5, 5)
|
||||||
|
with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'):
|
||||||
|
dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
|
def test_dmatrix_cupy_init(self):
|
||||||
|
import cupy as cp
|
||||||
|
data = cp.random.randn(5, 5)
|
||||||
|
dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
|
||||||
@ -3,12 +3,11 @@ import pytest
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
sys.path.append('tests/python/')
|
sys.path.append('tests/python/')
|
||||||
import test_linear # noqa: E402
|
import test_linear # noqa: E402
|
||||||
import testing as tm # noqa: E402
|
import testing as tm # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
class TestGPULinear(unittest.TestCase):
|
class TestGPULinear(unittest.TestCase):
|
||||||
|
|
||||||
datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
|
datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
|
||||||
common_param = {
|
common_param = {
|
||||||
'booster': ['gblinear'],
|
'booster': ['gblinear'],
|
||||||
@ -16,7 +15,7 @@ class TestGPULinear(unittest.TestCase):
|
|||||||
'eta': [0.5],
|
'eta': [0.5],
|
||||||
'top_k': [10],
|
'top_k': [10],
|
||||||
'tolerance': [1e-5],
|
'tolerance': [1e-5],
|
||||||
'alpha': [.005, .1],
|
'alpha': [.1],
|
||||||
'lambda': [0.005],
|
'lambda': [0.005],
|
||||||
'coordinate_selection': ['cyclic', 'random', 'greedy']}
|
'coordinate_selection': ['cyclic', 'random', 'greedy']}
|
||||||
|
|
||||||
@ -26,6 +25,6 @@ class TestGPULinear(unittest.TestCase):
|
|||||||
parameters['gpu_id'] = [0]
|
parameters['gpu_id'] = [0]
|
||||||
for param in test_linear.parameter_combinations(parameters):
|
for param in test_linear.parameter_combinations(parameters):
|
||||||
results = test_linear.run_suite(
|
results = test_linear.run_suite(
|
||||||
param, 150, self.datasets, scale_features=True)
|
param, 100, self.datasets, scale_features=True)
|
||||||
test_linear.assert_regression_result(results, 1e-2)
|
test_linear.assert_regression_result(results, 1e-2)
|
||||||
test_linear.assert_classification_result(results)
|
test_linear.assert_classification_result(results)
|
||||||
|
|||||||
@ -47,6 +47,7 @@ class TestGPU(unittest.TestCase):
|
|||||||
device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
|
device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
|
||||||
for param in test_param:
|
for param in test_param:
|
||||||
param['tree_method'] = 'gpu_hist'
|
param['tree_method'] = 'gpu_hist'
|
||||||
|
|
||||||
gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
|
gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
|
||||||
DMatrixT=xgb.DeviceQuantileDMatrix,
|
DMatrixT=xgb.DeviceQuantileDMatrix,
|
||||||
dmatrix_params={'max_bin': param['max_bin']})
|
dmatrix_params={'max_bin': param['max_bin']})
|
||||||
|
|||||||
@ -4,6 +4,8 @@ import numpy as np
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
from joblib import Memory
|
||||||
|
memory = Memory('./cachedir', verbose=0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from sklearn import datasets
|
from sklearn import datasets
|
||||||
@ -39,27 +41,35 @@ class Dataset:
|
|||||||
return self.__str__()
|
return self.__str__()
|
||||||
|
|
||||||
|
|
||||||
|
@memory.cache
|
||||||
def get_boston():
|
def get_boston():
|
||||||
data = datasets.load_boston()
|
data = datasets.load_boston()
|
||||||
return data.data, data.target
|
return data.data, data.target
|
||||||
|
|
||||||
|
|
||||||
|
@memory.cache
|
||||||
def get_digits():
|
def get_digits():
|
||||||
data = datasets.load_digits()
|
data = datasets.load_digits()
|
||||||
return data.data, data.target
|
return data.data, data.target
|
||||||
|
|
||||||
|
|
||||||
|
@memory.cache
|
||||||
def get_cancer():
|
def get_cancer():
|
||||||
data = datasets.load_breast_cancer()
|
data = datasets.load_breast_cancer()
|
||||||
return data.data, data.target
|
return data.data, data.target
|
||||||
|
|
||||||
|
|
||||||
|
@memory.cache
|
||||||
def get_sparse():
|
def get_sparse():
|
||||||
rng = np.random.RandomState(199)
|
rng = np.random.RandomState(199)
|
||||||
n = 5000
|
n = 2000
|
||||||
sparsity = 0.75
|
sparsity = 0.75
|
||||||
X, y = datasets.make_regression(n, random_state=rng)
|
X, y = datasets.make_regression(n, random_state=rng)
|
||||||
X = np.array([[0.0 if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
|
flag = rng.binomial(1, sparsity, X.shape)
|
||||||
|
for i in range(X.shape[0]):
|
||||||
|
for j in range(X.shape[1]):
|
||||||
|
if flag[i, j]:
|
||||||
|
X[i, j] = 0.0
|
||||||
from scipy import sparse
|
from scipy import sparse
|
||||||
X = sparse.csr_matrix(X)
|
X = sparse.csr_matrix(X)
|
||||||
return X, y
|
return X, y
|
||||||
@ -73,14 +83,18 @@ def get_small_weights():
|
|||||||
return get_weights_regression(1e-6, 1e-5)
|
return get_weights_regression(1e-6, 1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
@memory.cache
|
||||||
def get_weights_regression(min_weight, max_weight):
|
def get_weights_regression(min_weight, max_weight):
|
||||||
rng = np.random.RandomState(199)
|
rng = np.random.RandomState(199)
|
||||||
n = 10000
|
n = 2000
|
||||||
sparsity = 0.25
|
sparsity = 0.25
|
||||||
X, y = datasets.make_regression(n, random_state=rng)
|
X, y = datasets.make_regression(n, random_state=rng)
|
||||||
X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x
|
flag = rng.binomial(1, sparsity, X.shape)
|
||||||
for x in x_row] for x_row in X])
|
for i in range(X.shape[0]):
|
||||||
w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
|
for j in range(X.shape[1]):
|
||||||
|
if flag[i, j]:
|
||||||
|
X[i, j] = np.nan
|
||||||
|
w = rng.uniform(min_weight, max_weight, n)
|
||||||
return X, y, w
|
return X, y, w
|
||||||
|
|
||||||
|
|
||||||
@ -101,10 +115,12 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri
|
|||||||
np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
|
np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
|
||||||
delimiter=',')
|
delimiter=',')
|
||||||
dtrain = DMatrixT('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
|
dtrain = DMatrixT('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
|
||||||
weight=dataset.w)
|
weight=dataset.w)
|
||||||
elif DMatrixT is xgb.DeviceQuantileDMatrix:
|
elif DMatrixT is xgb.DeviceQuantileDMatrix:
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params)
|
dtrain = DMatrixT(cp.array(X), cp.array(dataset.y),
|
||||||
|
weight=None if dataset.w is None else cp.array(dataset.w),
|
||||||
|
**dmatrix_params)
|
||||||
else:
|
else:
|
||||||
dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params)
|
dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params)
|
||||||
|
|
||||||
@ -146,7 +162,8 @@ def parameter_combinations(variable_param):
|
|||||||
def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False,
|
def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False,
|
||||||
DMatrixT=xgb.DMatrix, dmatrix_params={}):
|
DMatrixT=xgb.DMatrix, dmatrix_params={}):
|
||||||
"""
|
"""
|
||||||
Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
|
Run the given parameters on a range of datasets. Objective and eval metric will be
|
||||||
|
automatically set
|
||||||
"""
|
"""
|
||||||
datasets = [
|
datasets = [
|
||||||
Dataset("Boston", get_boston, "reg:squarederror", "rmse"),
|
Dataset("Boston", get_boston, "reg:squarederror", "rmse"),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user