Enforce correct data shape. (#5191)

* Fix syncing DMatrix columns. * notes for tree method. * Enable feature validation for all interfaces except for jvm. * Better tests for boosting from predictions. * Disable validation on JVM.
2020-01-13 15:48:17 +08:00
parent 8cbcc53ccb
commit 7b65698187
14 changed files with 108 additions and 60 deletions
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -6,7 +6,6 @@ import subprocess
 import os
 import json
 import pytest
-import copy

 import xgboost as xgb
 from xgboost import XGBClassifier
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -13,6 +13,10 @@ class TestGPUPredict(unittest.TestCase):
        np.random.seed(1)
        test_num_rows = [10, 1000, 5000]
        test_num_cols = [10, 50, 500]
+        # This test passes for tree_method=gpu_hist and tree_method=exact. but
+        # for `hist` and `approx` the floating point error accumulates faster
+        # and fails even tol is set to 1e-4.  For `hist`, the mismatching rate
+        # with 5000 rows is 0.04.
        for num_rows in test_num_rows:
            for num_cols in test_num_cols:
                dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
@@ -27,7 +31,7 @@ class TestGPUPredict(unittest.TestCase):
                    "objective": "binary:logistic",
                    "predictor": "gpu_predictor",
                    'eval_metric': 'auc',
-                    'verbosity': '3'
+                    'tree_method': 'gpu_hist'
                }
                bst = xgb.train(param, dtrain, iterations, evals=watchlist,
                                evals_result=res)
@@ -43,11 +47,11 @@ class TestGPUPredict(unittest.TestCase):
                cpu_pred_val = bst_cpu.predict(dval, output_margin=True)

                np.testing.assert_allclose(cpu_pred_train, gpu_pred_train,
-                                           rtol=1e-3)
+                                           rtol=1e-6)
                np.testing.assert_allclose(cpu_pred_val, gpu_pred_val,
-                                           rtol=1e-3)
+                                           rtol=1e-6)
                np.testing.assert_allclose(cpu_pred_test, gpu_pred_test,
-                                           rtol=1e-3)
+                                           rtol=1e-6)

    def non_decreasing(self, L):
        return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -2,9 +2,11 @@ import xgboost as xgb
 import pytest
 import sys
 import numpy as np
+import unittest

 sys.path.append("tests/python")
-import testing as tm
+import testing as tm               # noqa
+import test_with_sklearn as twskl  # noqa

 pytestmark = pytest.mark.skipif(**tm.no_sklearn())

@@ -29,3 +31,10 @@ def test_gpu_binary_classification():
            err = sum(1 for i in range(len(preds))
                      if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
            assert err < 0.1
+
+
+class TestGPUBoostFromPrediction(unittest.TestCase):
+    cpu_test = twskl.TestBoostFromPrediction()
+
+    def test_boost_from_prediction_gpu_hist(self):
+        self.cpu_test.run_boost_from_prediction('gpu_hist')