Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)
* Add interaction effects and cox loss * Minimize whitespace changes * Cox loss now no longer needs a pre-sorted dataset. * Address code review comments * Remove mem check, rename to pred_interactions, include bias * Make lint happy * More lint fixes * Fix cox loss indexing * Fix main effects and tests * Fix lint * Use half interaction values on the off-diagonals * Fix lint again
This commit is contained in:
committed by
Vadim Khotilovich
parent
077abb35cd
commit
d878c36c84
@@ -215,7 +215,9 @@ class CPUPredictor : public Predictor {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
bool approximate,
|
||||
int condition,
|
||||
unsigned condition_feature) override {
|
||||
const int nthread = omp_get_max_threads();
|
||||
InitThreadTemp(nthread, model.param.num_feature);
|
||||
const MetaInfo& info = p_fmat->info();
|
||||
@@ -232,12 +234,10 @@ class CPUPredictor : public Predictor {
|
||||
// make sure contributions is zeroed, we could be reusing a previously
|
||||
// allocated one
|
||||
std::fill(contribs.begin(), contribs.end(), 0);
|
||||
if (approximate) {
|
||||
// initialize tree node mean values
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
|
||||
model.trees[i]->FillNodeMeanValues();
|
||||
}
|
||||
// initialize tree node mean values
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
|
||||
model.trees[i]->FillNodeMeanValues();
|
||||
}
|
||||
// start collecting the contributions
|
||||
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
|
||||
@@ -263,7 +263,8 @@ class CPUPredictor : public Predictor {
|
||||
continue;
|
||||
}
|
||||
if (!approximate) {
|
||||
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
|
||||
model.trees[j]->CalculateContributions(feats, root_id, p_contribs,
|
||||
condition, condition_feature);
|
||||
} else {
|
||||
model.trees[j]->CalculateContributionsApprox(feats, root_id, p_contribs);
|
||||
}
|
||||
@@ -279,6 +280,50 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
const MetaInfo& info = p_fmat->info();
|
||||
const int ngroup = model.param.num_output_group;
|
||||
size_t ncolumns = model.param.num_feature;
|
||||
const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
|
||||
const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);
|
||||
const unsigned crow_chunk = ngroup * (ncolumns + 1);
|
||||
|
||||
// allocate space for (number of features^2) times the number of rows and tmp off/on contribs
|
||||
std::vector<bst_float>& contribs = *out_contribs;
|
||||
contribs.resize(info.num_row * ngroup * (ncolumns + 1) * (ncolumns + 1));
|
||||
std::vector<bst_float> contribs_off(info.num_row * ngroup * (ncolumns + 1));
|
||||
std::vector<bst_float> contribs_on(info.num_row * ngroup * (ncolumns + 1));
|
||||
std::vector<bst_float> contribs_diag(info.num_row * ngroup * (ncolumns + 1));
|
||||
|
||||
// Compute the difference in effects when conditioning on each of the features on and off
|
||||
// see: Axiomatic characterizations of probabilistic and
|
||||
// cardinal-probabilistic interaction indices
|
||||
PredictContribution(p_fmat, &contribs_diag, model, ntree_limit, approximate, 0, 0);
|
||||
for (size_t i = 0; i < ncolumns + 1; ++i) {
|
||||
PredictContribution(p_fmat, &contribs_off, model, ntree_limit, approximate, -1, i);
|
||||
PredictContribution(p_fmat, &contribs_on, model, ntree_limit, approximate, 1, i);
|
||||
|
||||
for (size_t j = 0; j < info.num_row; ++j) {
|
||||
for (int l = 0; l < ngroup; ++l) {
|
||||
const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);
|
||||
const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);
|
||||
contribs[o_offset + i] = 0;
|
||||
for (size_t k = 0; k < ncolumns + 1; ++k) {
|
||||
// fill in the diagonal with additive effects, and off-diagonal with the interactions
|
||||
if (k == i) {
|
||||
contribs[o_offset + i] += contribs_diag[c_offset + k];
|
||||
} else {
|
||||
contribs[o_offset + k] = (contribs_on[c_offset + k] - contribs_off[c_offset + k])/2.0;
|
||||
contribs[o_offset + i] -= contribs[o_offset + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<RegTree::FVec> thread_temp;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user