SHAP values for feature contributions (#2438)

* SHAP values for feature contributions

* Fix commenting error

* New polynomial time SHAP value estimation algorithm

* Update API to support SHAP values

* Fix merge conflicts with updates in master

* Correct submodule hashes

* Fix variable sized stack allocation

* Make lint happy

* Add docs

* Fix typo

* Adjust tolerances

* Remove unneeded def

* Fixed cpp test setup

* Updated R API and cleaned up

* Fixed test typo
This commit is contained in:
Scott Lundberg
2017-10-12 12:35:51 -07:00
committed by GitHub
parent ff9180cd73
commit 78c4188cec
16 changed files with 369 additions and 143 deletions

View File

@@ -758,7 +758,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
(option_mask & 1) != 0,
&preds, ntree_limit,
(option_mask & 2) != 0,
(option_mask & 4) != 0);
(option_mask & 4) != 0,
(option_mask & 8) != 0);
*out_result = dmlc::BeginPtr(preds);
*len = static_cast<xgboost::bst_ulong>(preds.size());
API_END();

View File

@@ -224,7 +224,7 @@ class GBLinear : public GradientBooster {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit) override {
unsigned ntree_limit, bool approximate) override {
if (model.weight.size() == 0) {
model.InitModel();
}

View File

@@ -233,8 +233,8 @@ class GBTree : public GradientBooster {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit) override {
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit);
unsigned ntree_limit, bool approximate) override {
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,

View File

@@ -433,9 +433,9 @@ class LearnerImpl : public Learner {
void Predict(DMatrix* data, bool output_margin,
std::vector<bst_float>* out_preds, unsigned ntree_limit,
bool pred_leaf, bool pred_contribs) const override {
bool pred_leaf, bool pred_contribs, bool approx_contribs) const override {
if (pred_contribs) {
gbm_->PredictContribution(data, out_preds, ntree_limit);
gbm_->PredictContribution(data, out_preds, ntree_limit, approx_contribs);
} else if (pred_leaf) {
gbm_->PredictLeaf(data, out_preds, ntree_limit);
} else {

View File

@@ -206,9 +206,9 @@ class CPUPredictor : public Predictor {
}
}
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
bool approximate) override {
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature);
const MetaInfo& info = p_fmat->info();
@@ -225,10 +225,12 @@ class CPUPredictor : public Predictor {
// make sure contributions is zeroed, we could be reusing a previously
// allocated one
std::fill(contribs.begin(), contribs.end(), 0);
// initialize tree node mean values
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
model.trees[i]->FillNodeMeanValues();
if (approximate) {
// initialize tree node mean values
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
model.trees[i]->FillNodeMeanValues();
}
}
// start collecting the contributions
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
@@ -253,7 +255,11 @@ class CPUPredictor : public Predictor {
if (model.tree_info[j] != gid) {
continue;
}
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
if (!approximate) {
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
} else {
model.trees[j]->CalculateContributionsApprox(feats, root_id, p_contribs);
}
}
feats.Drop(batch[i]);
// add base margin to BIAS

View File

@@ -384,9 +384,10 @@ class GPUPredictor : public xgboost::Predictor {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit) override {
unsigned ntree_limit,
bool approximate) override {
cpu_predictor->PredictContribution(p_fmat, out_contribs, model,
ntree_limit);
ntree_limit, approximate);
}
void Init(const std::vector<std::pair<std::string, std::string>>& cfg,