Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)

* Add interaction effects and cox loss

* Minimize whitespace changes

* Cox loss now no longer needs a pre-sorted dataset.

* Address code review comments

* Remove mem check, rename to pred_interactions, include bias

* Make lint happy

* More lint fixes

* Fix cox loss indexing

* Fix main effects and tests

* Fix lint

* Use half interaction values on the off-diagonals

* Fix lint again
This commit is contained in:
Scott Lundberg
2018-02-07 18:38:01 -08:00
committed by Vadim Khotilovich
parent 077abb35cd
commit d878c36c84
19 changed files with 638 additions and 125 deletions

View File

@@ -992,7 +992,7 @@ class Booster(object):
return self.eval_set([(data, name)], iteration)
def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False,
pred_contribs=False, approx_contribs=False):
pred_contribs=False, approx_contribs=False, pred_interactions=False):
"""
Predict with data.
@@ -1019,14 +1019,21 @@ class Booster(object):
in both tree 1 and tree 0.
pred_contribs : bool
When this option is on, the output will be a matrix of (nsample, nfeats+1)
When this is True the output will be a matrix of size (nsample, nfeats + 1)
with each record indicating the feature contributions (SHAP values) for that
prediction. The sum of all feature contributions is equal to the prediction.
Note that the bias is added as the final column, on top of the regular features.
prediction. The sum of all feature contributions is equal to the raw untransformed
margin value of the prediction. Note the final column is the bias term.
approx_contribs : bool
Approximate the contributions of each feature
pred_interactions : bool
When this is True the output will be a matrix of size (nsample, nfeats + 1, nfeats + 1)
indicating the SHAP interaction values for each pair of features. The sum of each
row (or column) of the interaction values equals the corresponding SHAP value (from
pred_contribs), and the sum of the entire matrix equals the raw untransformed margin
value of the prediction. Note the last row and column correspond to the bias term.
Returns
-------
prediction : numpy array
@@ -1040,6 +1047,8 @@ class Booster(object):
option_mask |= 0x04
if approx_contribs:
option_mask |= 0x08
if pred_interactions:
option_mask |= 0x10
self._validate_features(data)
@@ -1055,8 +1064,22 @@ class Booster(object):
preds = preds.astype(np.int32)
nrow = data.num_row()
if preds.size != nrow and preds.size % nrow == 0:
ncol = int(preds.size / nrow)
preds = preds.reshape(nrow, ncol)
chunk_size = int(preds.size / nrow)
if pred_interactions:
ngroup = int(chunk_size / ((data.num_col() + 1) * (data.num_col() + 1)))
if ngroup == 1:
preds = preds.reshape(nrow, data.num_col() + 1, data.num_col() + 1)
else:
preds = preds.reshape(nrow, ngroup, data.num_col() + 1, data.num_col() + 1)
elif pred_contribs:
ngroup = int(chunk_size / (data.num_col() + 1))
if ngroup == 1:
preds = preds.reshape(nrow, data.num_col() + 1)
else:
preds = preds.reshape(nrow, ngroup, data.num_col() + 1)
else:
preds = preds.reshape(nrow, chunk_size)
return preds
def save_model(self, fname):