Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)
* Add interaction effects and cox loss * Minimize whitespace changes * Cox loss now no longer needs a pre-sorted dataset. * Address code review comments * Remove mem check, rename to pred_interactions, include bias * Make lint happy * More lint fixes * Fix cox loss indexing * Fix main effects and tests * Fix lint * Use half interaction values on the off-diagonals * Fix lint again
This commit is contained in:
committed by
Vadim Khotilovich
parent
077abb35cd
commit
d878c36c84
@@ -2,7 +2,6 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import unittest
|
||||
import itertools
|
||||
import json
|
||||
|
||||
dpath = 'demo/data/'
|
||||
@@ -143,35 +142,6 @@ class TestBasic(unittest.TestCase):
|
||||
dm = xgb.DMatrix(dummy, feature_names=list('abcde'))
|
||||
self.assertRaises(ValueError, bst.predict, dm)
|
||||
|
||||
def test_feature_importances(self):
|
||||
data = np.random.randn(100, 5)
|
||||
target = np.array([0, 1] * 50)
|
||||
|
||||
features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']
|
||||
|
||||
dm = xgb.DMatrix(data, label=target,
|
||||
feature_names=features)
|
||||
params = {'objective': 'multi:softprob',
|
||||
'eval_metric': 'mlogloss',
|
||||
'eta': 0.3,
|
||||
'num_class': 3}
|
||||
|
||||
bst = xgb.train(params, dm, num_boost_round=10)
|
||||
|
||||
# number of feature importances should == number of features
|
||||
scores1 = bst.get_score()
|
||||
scores2 = bst.get_score(importance_type='weight')
|
||||
scores3 = bst.get_score(importance_type='cover')
|
||||
scores4 = bst.get_score(importance_type='gain')
|
||||
assert len(scores1) == len(features)
|
||||
assert len(scores2) == len(features)
|
||||
assert len(scores3) == len(features)
|
||||
assert len(scores4) == len(features)
|
||||
|
||||
# check backwards compatibility of get_fscore
|
||||
fscores = bst.get_fscore()
|
||||
assert scores1 == fscores
|
||||
|
||||
def test_dump(self):
|
||||
data = np.random.randn(100, 2)
|
||||
target = np.array([0, 1] * 50)
|
||||
@@ -268,41 +238,3 @@ class TestBasic(unittest.TestCase):
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False)
|
||||
assert isinstance(cv, dict)
|
||||
assert len(cv) == (4)
|
||||
|
||||
|
||||
def test_contributions():
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
|
||||
def test_fn(max_depth, num_rounds):
|
||||
# train
|
||||
params = {'max_depth': max_depth, 'eta': 1, 'silent': 1}
|
||||
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
|
||||
|
||||
# predict
|
||||
preds = bst.predict(dtest)
|
||||
contribs = bst.predict(dtest, pred_contribs=True)
|
||||
|
||||
# result should be (number of features + BIAS) * number of rows
|
||||
assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1)
|
||||
|
||||
# sum of contributions should be same as predictions
|
||||
np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds)
|
||||
|
||||
for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)):
|
||||
yield test_fn, max_depth, num_rounds
|
||||
|
||||
# check that we get the right SHAP values for a basic AND example
|
||||
# (https://arxiv.org/abs/1706.06060)
|
||||
X = np.zeros((4, 2))
|
||||
X[0, :] = 1
|
||||
X[1, 0] = 1
|
||||
X[2, 1] = 1
|
||||
y = np.zeros(4)
|
||||
y[0] = 1
|
||||
param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}
|
||||
bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)
|
||||
out = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
|
||||
assert out[0, 0] == 0.375
|
||||
assert out[0, 1] == 0.375
|
||||
assert out[0, 2] == 0.25
|
||||
|
||||
252
tests/python/test_shap.py
Normal file
252
tests/python/test_shap.py
Normal file
@@ -0,0 +1,252 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import unittest
|
||||
import itertools
|
||||
import re
|
||||
import scipy
|
||||
import scipy.special
|
||||
|
||||
dpath = 'demo/data/'
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestSHAP(unittest.TestCase):
|
||||
|
||||
def test_feature_importances(self):
|
||||
data = np.random.randn(100, 5)
|
||||
target = np.array([0, 1] * 50)
|
||||
|
||||
features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']
|
||||
|
||||
dm = xgb.DMatrix(data, label=target,
|
||||
feature_names=features)
|
||||
params = {'objective': 'multi:softprob',
|
||||
'eval_metric': 'mlogloss',
|
||||
'eta': 0.3,
|
||||
'num_class': 3}
|
||||
|
||||
bst = xgb.train(params, dm, num_boost_round=10)
|
||||
|
||||
# number of feature importances should == number of features
|
||||
scores1 = bst.get_score()
|
||||
scores2 = bst.get_score(importance_type='weight')
|
||||
scores3 = bst.get_score(importance_type='cover')
|
||||
scores4 = bst.get_score(importance_type='gain')
|
||||
assert len(scores1) == len(features)
|
||||
assert len(scores2) == len(features)
|
||||
assert len(scores3) == len(features)
|
||||
assert len(scores4) == len(features)
|
||||
|
||||
# check backwards compatibility of get_fscore
|
||||
fscores = bst.get_fscore()
|
||||
assert scores1 == fscores
|
||||
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
|
||||
def fn(max_depth, num_rounds):
|
||||
# train
|
||||
params = {'max_depth': max_depth, 'eta': 1, 'silent': 1}
|
||||
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
|
||||
|
||||
# predict
|
||||
preds = bst.predict(dtest)
|
||||
contribs = bst.predict(dtest, pred_contribs=True)
|
||||
|
||||
# result should be (number of features + BIAS) * number of rows
|
||||
assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1)
|
||||
|
||||
# sum of contributions should be same as predictions
|
||||
np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds)
|
||||
|
||||
# for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)):
|
||||
# yield fn, max_depth, num_rounds
|
||||
|
||||
# check that we get the right SHAP values for a basic AND example
|
||||
# (https://arxiv.org/abs/1706.06060)
|
||||
X = np.zeros((4, 2))
|
||||
X[0, :] = 1
|
||||
X[1, 0] = 1
|
||||
X[2, 1] = 1
|
||||
y = np.zeros(4)
|
||||
y[0] = 1
|
||||
param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}
|
||||
bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)
|
||||
out = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
|
||||
assert out[0, 0] == 0.375
|
||||
assert out[0, 1] == 0.375
|
||||
assert out[0, 2] == 0.25
|
||||
|
||||
def parse_model(model):
|
||||
trees = []
|
||||
r_exp = r"([0-9]+):\[f([0-9]+)<([0-9\.e-]+)\] yes=([0-9]+),no=([0-9]+).*cover=([0-9e\.]+)"
|
||||
r_exp_leaf = r"([0-9]+):leaf=([0-9\.e-]+),cover=([0-9e\.]+)"
|
||||
for tree in model.get_dump(with_stats=True):
|
||||
|
||||
lines = list(tree.splitlines())
|
||||
trees.append([None for i in range(len(lines))])
|
||||
for line in lines:
|
||||
match = re.search(r_exp, line)
|
||||
if match is not None:
|
||||
ind = int(match.group(1))
|
||||
while ind >= len(trees[-1]):
|
||||
trees[-1].append(None)
|
||||
trees[-1][ind] = {
|
||||
"yes_ind": int(match.group(4)),
|
||||
"no_ind": int(match.group(5)),
|
||||
"value": None,
|
||||
"threshold": float(match.group(3)),
|
||||
"feature_index": int(match.group(2)),
|
||||
"cover": float(match.group(6))
|
||||
}
|
||||
else:
|
||||
|
||||
match = re.search(r_exp_leaf, line)
|
||||
ind = int(match.group(1))
|
||||
while ind >= len(trees[-1]):
|
||||
trees[-1].append(None)
|
||||
trees[-1][ind] = {
|
||||
"value": float(match.group(2)),
|
||||
"cover": float(match.group(3))
|
||||
}
|
||||
return trees
|
||||
|
||||
def exp_value_rec(tree, z, x, i=0):
|
||||
if tree[i]["value"] is not None:
|
||||
return tree[i]["value"]
|
||||
else:
|
||||
ind = tree[i]["feature_index"]
|
||||
if z[ind] == 1:
|
||||
if x[ind] < tree[i]["threshold"]:
|
||||
return exp_value_rec(tree, z, x, tree[i]["yes_ind"])
|
||||
else:
|
||||
return exp_value_rec(tree, z, x, tree[i]["no_ind"])
|
||||
else:
|
||||
r_yes = tree[tree[i]["yes_ind"]]["cover"] / tree[i]["cover"]
|
||||
out = exp_value_rec(tree, z, x, tree[i]["yes_ind"])
|
||||
val = out * r_yes
|
||||
|
||||
r_no = tree[tree[i]["no_ind"]]["cover"] / tree[i]["cover"]
|
||||
out = exp_value_rec(tree, z, x, tree[i]["no_ind"])
|
||||
val += out * r_no
|
||||
return val
|
||||
|
||||
def exp_value(trees, z, x):
|
||||
return np.sum([exp_value_rec(tree, z, x) for tree in trees])
|
||||
|
||||
def all_subsets(ss):
|
||||
return itertools.chain(*map(lambda x: itertools.combinations(ss, x), range(0, len(ss) + 1)))
|
||||
|
||||
def shap_value(trees, x, i, cond=None, cond_value=None):
|
||||
M = len(x)
|
||||
z = np.zeros(M)
|
||||
other_inds = list(set(range(M)) - set([i]))
|
||||
if cond is not None:
|
||||
other_inds = list(set(other_inds) - set([cond]))
|
||||
z[cond] = cond_value
|
||||
M -= 1
|
||||
total = 0.0
|
||||
|
||||
for subset in all_subsets(other_inds):
|
||||
if len(subset) > 0:
|
||||
z[list(subset)] = 1
|
||||
v1 = exp_value(trees, z, x)
|
||||
z[i] = 1
|
||||
v2 = exp_value(trees, z, x)
|
||||
total += (v2 - v1) / (scipy.special.binom(M - 1, len(subset)) * M)
|
||||
z[i] = 0
|
||||
z[list(subset)] = 0
|
||||
return total
|
||||
|
||||
def shap_values(trees, x):
|
||||
vals = [shap_value(trees, x, i) for i in range(len(x))]
|
||||
vals.append(exp_value(trees, np.zeros(len(x)), x))
|
||||
return np.array(vals)
|
||||
|
||||
def interaction_values(trees, x):
|
||||
M = len(x)
|
||||
out = np.zeros((M + 1, M + 1))
|
||||
for i in range(len(x)):
|
||||
for j in range(len(x)):
|
||||
if i != j:
|
||||
out[i, j] = interaction_value(trees, x, i, j) / 2
|
||||
svals = shap_values(trees, x)
|
||||
main_effects = svals - out.sum(1)
|
||||
out[np.diag_indices_from(out)] = main_effects
|
||||
return out
|
||||
|
||||
def interaction_value(trees, x, i, j):
|
||||
M = len(x)
|
||||
z = np.zeros(M)
|
||||
other_inds = list(set(range(M)) - set([i, j]))
|
||||
|
||||
total = 0.0
|
||||
for subset in all_subsets(other_inds):
|
||||
if len(subset) > 0:
|
||||
z[list(subset)] = 1
|
||||
v00 = exp_value(trees, z, x)
|
||||
z[i] = 1
|
||||
v10 = exp_value(trees, z, x)
|
||||
z[j] = 1
|
||||
v11 = exp_value(trees, z, x)
|
||||
z[i] = 0
|
||||
v01 = exp_value(trees, z, x)
|
||||
z[j] = 0
|
||||
total += (v11 - v01 - v10 + v00) / (scipy.special.binom(M - 2, len(subset)) * (M - 1))
|
||||
z[list(subset)] = 0
|
||||
return total
|
||||
|
||||
# test a simple and function
|
||||
M = 2
|
||||
N = 4
|
||||
X = np.zeros((N, M))
|
||||
X[0, :] = 1
|
||||
X[1, 0] = 1
|
||||
X[2, 1] = 1
|
||||
y = np.zeros(N)
|
||||
y[0] = 1
|
||||
param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}
|
||||
bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)
|
||||
brute_force = shap_values(parse_model(bst), X[0, :])
|
||||
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
|
||||
assert np.linalg.norm(brute_force - fast_method[0, :]) < 1e-4
|
||||
|
||||
brute_force = interaction_values(parse_model(bst), X[0, :])
|
||||
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)
|
||||
assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4
|
||||
|
||||
# test a random function
|
||||
np.random.seed(0)
|
||||
M = 2
|
||||
N = 4
|
||||
X = np.random.randn(N, M)
|
||||
y = np.random.randn(N)
|
||||
param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}
|
||||
bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)
|
||||
brute_force = shap_values(parse_model(bst), X[0, :])
|
||||
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
|
||||
assert np.linalg.norm(brute_force - fast_method[0, :]) < 1e-4
|
||||
|
||||
brute_force = interaction_values(parse_model(bst), X[0, :])
|
||||
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)
|
||||
assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4
|
||||
|
||||
# test another larger more complex random function
|
||||
np.random.seed(0)
|
||||
M = 5
|
||||
N = 100
|
||||
X = np.random.randn(N, M)
|
||||
y = np.random.randn(N)
|
||||
base_score = 1.0
|
||||
param = {"max_depth": 5, "base_score": base_score, "eta": 0.1, "gamma": 2.0}
|
||||
bst = xgb.train(param, xgb.DMatrix(X, label=y), 10)
|
||||
brute_force = shap_values(parse_model(bst), X[0, :])
|
||||
brute_force[-1] += base_score
|
||||
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
|
||||
assert np.linalg.norm(brute_force - fast_method[0, :]) < 1e-4
|
||||
|
||||
brute_force = interaction_values(parse_model(bst), X[0, :])
|
||||
brute_force[-1, -1] += base_score
|
||||
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)
|
||||
assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4
|
||||
Reference in New Issue
Block a user