Enforce correct data shape. (#5191)
* Fix syncing DMatrix columns. * notes for tree method. * Enable feature validation for all interfaces except for jvm. * Better tests for boosting from predictions. * Disable validation on JVM.
This commit is contained in:
@@ -91,7 +91,6 @@ TEST(Learner, CheckGroup) {
|
||||
}
|
||||
|
||||
TEST(Learner, SLOW_CheckMultiBatch) {
|
||||
using Arg = std::pair<std::string, std::string>;
|
||||
// Create sufficiently large data to make two row pages
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/big.libsvm";
|
||||
@@ -107,7 +106,7 @@ TEST(Learner, SLOW_CheckMultiBatch) {
|
||||
dmat->Info().SetInfo("label", labels.data(), DataType::kFloat32, num_row);
|
||||
std::vector<std::shared_ptr<DMatrix>> mat{dmat};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->SetParams({Arg{"objective", "binary:logistic"}, Arg{"verbosity", "3"}});
|
||||
learner->SetParams(Args{{"objective", "binary:logistic"}});
|
||||
learner->UpdateOneIter(0, dmat.get());
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ import subprocess
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
import copy
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
@@ -13,6 +13,10 @@ class TestGPUPredict(unittest.TestCase):
|
||||
np.random.seed(1)
|
||||
test_num_rows = [10, 1000, 5000]
|
||||
test_num_cols = [10, 50, 500]
|
||||
# This test passes for tree_method=gpu_hist and tree_method=exact. but
|
||||
# for `hist` and `approx` the floating point error accumulates faster
|
||||
# and fails even tol is set to 1e-4. For `hist`, the mismatching rate
|
||||
# with 5000 rows is 0.04.
|
||||
for num_rows in test_num_rows:
|
||||
for num_cols in test_num_cols:
|
||||
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
|
||||
@@ -27,7 +31,7 @@ class TestGPUPredict(unittest.TestCase):
|
||||
"objective": "binary:logistic",
|
||||
"predictor": "gpu_predictor",
|
||||
'eval_metric': 'auc',
|
||||
'verbosity': '3'
|
||||
'tree_method': 'gpu_hist'
|
||||
}
|
||||
bst = xgb.train(param, dtrain, iterations, evals=watchlist,
|
||||
evals_result=res)
|
||||
@@ -43,11 +47,11 @@ class TestGPUPredict(unittest.TestCase):
|
||||
cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
|
||||
|
||||
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train,
|
||||
rtol=1e-3)
|
||||
rtol=1e-6)
|
||||
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val,
|
||||
rtol=1e-3)
|
||||
rtol=1e-6)
|
||||
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test,
|
||||
rtol=1e-3)
|
||||
rtol=1e-6)
|
||||
|
||||
def non_decreasing(self, L):
|
||||
return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
|
||||
|
||||
@@ -2,9 +2,11 @@ import xgboost as xgb
|
||||
import pytest
|
||||
import sys
|
||||
import numpy as np
|
||||
import unittest
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
import testing as tm # noqa
|
||||
import test_with_sklearn as twskl # noqa
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
|
||||
|
||||
@@ -29,3 +31,10 @@ def test_gpu_binary_classification():
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
|
||||
class TestGPUBoostFromPrediction(unittest.TestCase):
|
||||
cpu_test = twskl.TestBoostFromPrediction()
|
||||
|
||||
def test_boost_from_prediction_gpu_hist(self):
|
||||
self.cpu_test.run_boost_from_prediction('gpu_hist')
|
||||
|
||||
@@ -5,6 +5,7 @@ import tempfile
|
||||
import os
|
||||
import shutil
|
||||
import pytest
|
||||
import unittest
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@@ -697,21 +698,37 @@ def test_XGBClassifier_resume():
|
||||
assert log_loss1 > log_loss2
|
||||
|
||||
|
||||
def test_boost_from_prediction():
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
model_0 = xgb.XGBClassifier(
|
||||
learning_rate=0.3, random_state=0, n_estimators=4)
|
||||
model_0.fit(X=X, y=y)
|
||||
margin = model_0.predict(X, output_margin=True)
|
||||
class TestBoostFromPrediction(unittest.TestCase):
|
||||
def run_boost_from_prediction(self, tree_method):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
model_0 = xgb.XGBClassifier(
|
||||
learning_rate=0.3, random_state=0, n_estimators=4,
|
||||
tree_method=tree_method)
|
||||
model_0.fit(X=X, y=y)
|
||||
margin = model_0.predict(X, output_margin=True)
|
||||
|
||||
model_1 = xgb.XGBClassifier(
|
||||
learning_rate=0.3, random_state=0, n_estimators=4)
|
||||
model_1.fit(X=X, y=y, base_margin=margin)
|
||||
predictions_1 = model_1.predict(X, base_margin=margin)
|
||||
model_1 = xgb.XGBClassifier(
|
||||
learning_rate=0.3, random_state=0, n_estimators=4,
|
||||
tree_method=tree_method)
|
||||
model_1.fit(X=X, y=y, base_margin=margin)
|
||||
predictions_1 = model_1.predict(X, base_margin=margin)
|
||||
|
||||
cls_2 = xgb.XGBClassifier(
|
||||
learning_rate=0.3, random_state=0, n_estimators=8)
|
||||
cls_2.fit(X=X, y=y)
|
||||
predictions_2 = cls_2.predict(X, base_margin=margin)
|
||||
assert np.all(predictions_1 == predictions_2)
|
||||
cls_2 = xgb.XGBClassifier(
|
||||
learning_rate=0.3, random_state=0, n_estimators=8,
|
||||
tree_method=tree_method)
|
||||
cls_2.fit(X=X, y=y)
|
||||
predictions_2 = cls_2.predict(X)
|
||||
assert np.all(predictions_1 == predictions_2)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_boost_from_prediction_hist(self):
|
||||
self.run_boost_from_prediction('hist')
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_boost_from_prediction_approx(self):
|
||||
self.run_boost_from_prediction('approx')
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_boost_from_prediction_exact(self):
|
||||
self.run_boost_from_prediction('exact')
|
||||
|
||||
Reference in New Issue
Block a user