SHAP values for feature contributions (#2438)
* SHAP values for feature contributions * Fix commenting error * New polynomial time SHAP value estimation algorithm * Update API to support SHAP values * Fix merge conflicts with updates in master * Correct submodule hashes * Fix variable sized stack allocation * Make lint happy * Add docs * Fix typo * Adjust tolerances * Remove unneeded def * Fixed cpp test setup * Updated R API and cleaned up * Fixed test typo
This commit is contained in:
@@ -758,7 +758,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
(option_mask & 1) != 0,
|
||||
&preds, ntree_limit,
|
||||
(option_mask & 2) != 0,
|
||||
(option_mask & 4) != 0);
|
||||
(option_mask & 4) != 0,
|
||||
(option_mask & 8) != 0);
|
||||
*out_result = dmlc::BeginPtr(preds);
|
||||
*len = static_cast<xgboost::bst_ulong>(preds.size());
|
||||
API_END();
|
||||
|
||||
@@ -224,7 +224,7 @@ class GBLinear : public GradientBooster {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit) override {
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
if (model.weight.size() == 0) {
|
||||
model.InitModel();
|
||||
}
|
||||
|
||||
@@ -233,8 +233,8 @@ class GBTree : public GradientBooster {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit) override {
|
||||
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit);
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
|
||||
@@ -433,9 +433,9 @@ class LearnerImpl : public Learner {
|
||||
|
||||
void Predict(DMatrix* data, bool output_margin,
|
||||
std::vector<bst_float>* out_preds, unsigned ntree_limit,
|
||||
bool pred_leaf, bool pred_contribs) const override {
|
||||
bool pred_leaf, bool pred_contribs, bool approx_contribs) const override {
|
||||
if (pred_contribs) {
|
||||
gbm_->PredictContribution(data, out_preds, ntree_limit);
|
||||
gbm_->PredictContribution(data, out_preds, ntree_limit, approx_contribs);
|
||||
} else if (pred_leaf) {
|
||||
gbm_->PredictLeaf(data, out_preds, ntree_limit);
|
||||
} else {
|
||||
|
||||
@@ -206,9 +206,9 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
|
||||
void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
const int nthread = omp_get_max_threads();
|
||||
InitThreadTemp(nthread, model.param.num_feature);
|
||||
const MetaInfo& info = p_fmat->info();
|
||||
@@ -225,10 +225,12 @@ class CPUPredictor : public Predictor {
|
||||
// make sure contributions is zeroed, we could be reusing a previously
|
||||
// allocated one
|
||||
std::fill(contribs.begin(), contribs.end(), 0);
|
||||
// initialize tree node mean values
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
|
||||
model.trees[i]->FillNodeMeanValues();
|
||||
if (approximate) {
|
||||
// initialize tree node mean values
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
|
||||
model.trees[i]->FillNodeMeanValues();
|
||||
}
|
||||
}
|
||||
// start collecting the contributions
|
||||
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
|
||||
@@ -253,7 +255,11 @@ class CPUPredictor : public Predictor {
|
||||
if (model.tree_info[j] != gid) {
|
||||
continue;
|
||||
}
|
||||
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
|
||||
if (!approximate) {
|
||||
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
|
||||
} else {
|
||||
model.trees[j]->CalculateContributionsApprox(feats, root_id, p_contribs);
|
||||
}
|
||||
}
|
||||
feats.Drop(batch[i]);
|
||||
// add base margin to BIAS
|
||||
|
||||
@@ -384,9 +384,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit) override {
|
||||
unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
cpu_predictor->PredictContribution(p_fmat, out_contribs, model,
|
||||
ntree_limit);
|
||||
ntree_limit, approximate);
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
|
||||
Reference in New Issue
Block a user