Speed up python test (#5752)

* Speed up tests * Prevent DeviceQuantileDMatrix initialisation with numpy * Use joblib.memory * Use RandomState
2020-06-05 11:39:24 +12:00 · 2020-06-05 11:39:24 +12:00 · 359023c0fa
commit 359023c0fa
parent cfc23c6a6b
5 changed files with 53 additions and 18 deletions
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@ -566,10 +566,6 @@ class DeviceQuantileCudaArrayInterfaceHandler(

 __device_quantile_dmatrix_registry.register_handler(
    'cupy.core.core', 'ndarray', DeviceQuantileCudaArrayInterfaceHandler)
-__device_quantile_dmatrix_registry.register_handler_opaque(
-    lambda x: hasattr(x, '__array__'), NumpyHandler)
-__device_quantile_dmatrix_registry.register_handler_opaque(
-    lambda x: hasattr(x, '__cuda_array_interface__'), NumpyHandler)


 class DeviceQuantileCudaColumnarHandler(DeviceQuantileDMatrixDataHandler,
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import xgboost as xgb
+import unittest
+import pytest
+import sys
+
+sys.path.append("tests/python")
+import testing as tm
+
+
+class TestDeviceQuantileDMatrix(unittest.TestCase):
+    def test_dmatrix_numpy_init(self):
+        data = np.random.randn(5, 5)
+        with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'):
+            dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
+
+    @pytest.mark.skipif(**tm.no_cupy())
+    def test_dmatrix_cupy_init(self):
+        import cupy as cp
+        data = cp.random.randn(5, 5)
+        dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@ -3,12 +3,11 @@ import pytest
 import unittest

 sys.path.append('tests/python/')
-import test_linear              # noqa: E402
-import testing as tm            # noqa: E402
+import test_linear  # noqa: E402
+import testing as tm  # noqa: E402


 class TestGPULinear(unittest.TestCase):
-
    datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
    common_param = {
        'booster': ['gblinear'],
@ -16,7 +15,7 @@ class TestGPULinear(unittest.TestCase):
        'eta': [0.5],
        'top_k': [10],
        'tolerance': [1e-5],
-        'alpha': [.005, .1],
+        'alpha': [.1],
        'lambda': [0.005],
        'coordinate_selection': ['cyclic', 'random', 'greedy']}

@ -26,6 +25,6 @@ class TestGPULinear(unittest.TestCase):
        parameters['gpu_id'] = [0]
        for param in test_linear.parameter_combinations(parameters):
            results = test_linear.run_suite(
-                param, 150, self.datasets, scale_features=True)
+                param, 100, self.datasets, scale_features=True)
            test_linear.assert_regression_result(results, 1e-2)
            test_linear.assert_classification_result(results)
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@ -47,6 +47,7 @@ class TestGPU(unittest.TestCase):
        device_dmatrix_datasets = ["Boston", "Cancer", "Digits"]
        for param in test_param:
            param['tree_method'] = 'gpu_hist'
+            
            gpu_results_device_dmatrix = run_suite(param, select_datasets=device_dmatrix_datasets,
                                                   DMatrixT=xgb.DeviceQuantileDMatrix,
                                                   dmatrix_params={'max_bin': param['max_bin']})
--- a/tests/python/regression_test_utilities.py
+++ b/tests/python/regression_test_utilities.py
@ -4,6 +4,8 @@ import numpy as np
 import os
 import sys
 import xgboost as xgb
+from joblib import Memory
+memory = Memory('./cachedir', verbose=0)

 try:
    from sklearn import datasets
@ -39,27 +41,35 @@ class Dataset:
        return self.__str__()


+@memory.cache
 def get_boston():
    data = datasets.load_boston()
    return data.data, data.target


+@memory.cache
 def get_digits():
    data = datasets.load_digits()
    return data.data, data.target


+@memory.cache
 def get_cancer():
    data = datasets.load_breast_cancer()
    return data.data, data.target


+@memory.cache
 def get_sparse():
    rng = np.random.RandomState(199)
-    n = 5000
+    n = 2000
    sparsity = 0.75
    X, y = datasets.make_regression(n, random_state=rng)
-    X = np.array([[0.0 if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
+    flag = rng.binomial(1, sparsity, X.shape)
+    for i in range(X.shape[0]):
+        for j in range(X.shape[1]):
+            if flag[i, j]:
+                X[i, j] = 0.0
    from scipy import sparse
    X = sparse.csr_matrix(X)
    return X, y
@ -73,14 +83,18 @@ def get_small_weights():
    return get_weights_regression(1e-6, 1e-5)


+@memory.cache
 def get_weights_regression(min_weight, max_weight):
    rng = np.random.RandomState(199)
-    n = 10000
+    n = 2000
    sparsity = 0.25
    X, y = datasets.make_regression(n, random_state=rng)
-    X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x
-                   for x in x_row] for x_row in X])
-    w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
+    flag = rng.binomial(1, sparsity, X.shape)
+    for i in range(X.shape[0]):
+        for j in range(X.shape[1]):
+            if flag[i, j]:
+                X[i, j] = np.nan
+    w = rng.uniform(min_weight, max_weight, n)
    return X, y, w


@ -101,10 +115,12 @@ def train_dataset(dataset, param_in, num_rounds=10, scale_features=False, DMatri
        np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
                   delimiter=',')
        dtrain = DMatrixT('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
-                             weight=dataset.w)
+                          weight=dataset.w)
    elif DMatrixT is xgb.DeviceQuantileDMatrix:
        import cupy as cp
-        dtrain = DMatrixT(cp.array(X), dataset.y, weight=dataset.w, **dmatrix_params)
+        dtrain = DMatrixT(cp.array(X), cp.array(dataset.y),
+                          weight=None if dataset.w is None else cp.array(dataset.w),
+                          **dmatrix_params)
    else:
        dtrain = DMatrixT(X, dataset.y, weight=dataset.w, **dmatrix_params)

@ -146,7 +162,8 @@ def parameter_combinations(variable_param):
 def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False,
              DMatrixT=xgb.DMatrix, dmatrix_params={}):
    """
-    Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
+    Run the given parameters on a range of datasets. Objective and eval metric will be
+    automatically set
    """
    datasets = [
        Dataset("Boston", get_boston, "reg:squarederror", "rmse"),