SHAP values for feature contributions (#2438)

* SHAP values for feature contributions

* Fix commenting error

* New polynomial time SHAP value estimation algorithm

* Update API to support SHAP values

* Fix merge conflicts with updates in master

* Correct submodule hashes

* Fix variable sized stack allocation

* Make lint happy

* Add docs

* Fix typo

* Adjust tolerances

* Remove unneeded def

* Fixed cpp test setup

* Updated R API and cleaned up

* Fixed test typo
This commit is contained in:
Scott Lundberg
2017-10-12 12:35:51 -07:00
committed by GitHub
parent ff9180cd73
commit 78c4188cec
16 changed files with 369 additions and 143 deletions

View File

@@ -12,6 +12,7 @@ TEST(cpu_predictor, Test) {
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
trees.back()->InitModel();
(*trees.back())[0].set_leaf(1.5f);
(*trees.back()).stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
@@ -50,5 +51,11 @@ TEST(cpu_predictor, Test) {
for (int i = 0; i < out_contribution.size(); i++) {
ASSERT_EQ(out_contribution[i], 1.5);
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model, true);
for (int i = 0; i < out_contribution.size(); i++) {
ASSERT_EQ(out_contribution[i], 1.5);
}
}
} // namespace xgboost
} // namespace xgboost

View File

@@ -19,6 +19,7 @@ TEST(gpu_predictor, Test) {
trees.push_back(std::unique_ptr<RegTree>());
trees.back()->InitModel();
(*trees.back())[0].set_leaf(1.5f);
(*trees.back()).stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;

View File

@@ -291,3 +291,18 @@ def test_contributions():
for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)):
yield test_fn, max_depth, num_rounds
# check that we get the right SHAP values for a basic AND example
# (https://arxiv.org/abs/1706.06060)
X = np.zeros((4, 2))
X[0, :] = 1
X[1, 0] = 1
X[2, 1] = 1
y = np.zeros(4)
y[0] = 1
param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}
bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)
out = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
assert out[0, 0] == 0.375
assert out[0, 1] == 0.375
assert out[0, 2] == 0.25