xgboost/tests/python/test_monotone_constraints.py
Jiaming Yuan 1f9a57d17b
[Breaking] Require format to be specified in input URI. (#9077)
Previously, we use `libsvm` as default when format is not specified. However, the dmlc
data parser is not particularly robust against errors, and the most common type of error
is undefined format.

Along with which, we will recommend users to use other data loader instead. We will
continue the maintenance of the parsers as it's currently used for many internal tests
including federated learning.
2023-04-28 19:45:15 +08:00

152 lines
5.2 KiB
Python

import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
dpath = 'demo/data/'
def is_increasing(y):
return np.count_nonzero(np.diff(y) < 0.0) == 0
def is_decreasing(y):
return np.count_nonzero(np.diff(y) > 0.0) == 0
def is_correctly_constrained(learner, feature_names=None):
n = 100
variable_x = np.linspace(0, 1, n).reshape((n, 1))
fixed_xs_values = np.linspace(0, 1, n)
for i in range(n):
fixed_x = fixed_xs_values[i] * np.ones((n, 1))
monotonically_increasing_x = np.column_stack((variable_x, fixed_x))
monotonically_increasing_dset = xgb.DMatrix(monotonically_increasing_x,
feature_names=feature_names)
monotonically_increasing_y = learner.predict(
monotonically_increasing_dset
)
monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))
monotonically_decreasing_dset = xgb.DMatrix(monotonically_decreasing_x,
feature_names=feature_names)
monotonically_decreasing_y = learner.predict(
monotonically_decreasing_dset
)
if not (
is_increasing(monotonically_increasing_y) and
is_decreasing(monotonically_decreasing_y)
):
return False
return True
number_of_dpoints = 1000
x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints)
x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
x = np.column_stack((
x1_positively_correlated_with_y, x2_negatively_correlated_with_y
))
zs = np.random.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
y = (
5 * x1_positively_correlated_with_y +
np.sin(10 * np.pi * x1_positively_correlated_with_y) -
5 * x2_negatively_correlated_with_y -
np.cos(10 * np.pi * x2_negatively_correlated_with_y) +
zs
)
training_dset = xgb.DMatrix(x, label=y)
class TestMonotoneConstraints:
def test_monotone_constraints_for_exact_tree_method(self):
# first check monotonicity for the 'exact' tree method
params_for_constrained_exact_method = {
'tree_method': 'exact', 'verbosity': 1,
'monotone_constraints': '(1, -1)'
}
constrained_exact_method = xgb.train(
params_for_constrained_exact_method, training_dset
)
assert is_correctly_constrained(constrained_exact_method)
@pytest.mark.parametrize(
"tree_method,policy",
[
("hist", "depthwise"),
("approx", "depthwise"),
("hist", "lossguide"),
("approx", "lossguide"),
],
)
def test_monotone_constraints(self, tree_method: str, policy: str) -> None:
params_for_constrained = {
"tree_method": tree_method,
"grow_policy": policy,
"monotone_constraints": "(1, -1)",
}
constrained = xgb.train(params_for_constrained, training_dset)
assert is_correctly_constrained(constrained)
def test_monotone_constraints_tuple(self) -> None:
params_for_constrained = {"monotone_constraints": (1, -1)}
constrained = xgb.train(params_for_constrained, training_dset)
assert is_correctly_constrained(constrained)
@pytest.mark.parametrize('format', [dict, list])
def test_monotone_constraints_feature_names(self, format):
# next check monotonicity when initializing monotone_constraints by feature names
params = {
'tree_method': 'hist',
'grow_policy': 'lossguide',
'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
}
if format == list:
params = list(params.items())
with pytest.raises(ValueError):
xgb.train(params, training_dset)
feature_names = ['feature_0', 'feature_2']
training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
with pytest.raises(ValueError):
xgb.train(params, training_dset_w_feature_names)
feature_names = ['feature_0', 'feature_1']
training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
constrained_learner = xgb.train(
params, training_dset_w_feature_names
)
assert is_correctly_constrained(constrained_learner, feature_names)
@pytest.mark.skipif(**tm.no_sklearn())
def test_training_accuracy(self):
from sklearn.metrics import accuracy_score
dtrain = xgb.DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm")
dtest = xgb.DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm")
params = {'eta': 1, 'max_depth': 6, 'objective': 'binary:logistic',
'tree_method': 'hist', 'monotone_constraints': '(1, 0)'}
num_boost_round = 5
params['grow_policy'] = 'lossguide'
bst = xgb.train(params, dtrain, num_boost_round)
pred_dtest = (bst.predict(dtest) < 0.5)
assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
params['grow_policy'] = 'depthwise'
bst = xgb.train(params, dtrain, num_boost_round)
pred_dtest = (bst.predict(dtest) < 0.5)
assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1