Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)
* Add interaction effects and cox loss * Minimize whitespace changes * Cox loss now no longer needs a pre-sorted dataset. * Address code review comments * Remove mem check, rename to pred_interactions, include bias * Make lint happy * More lint fixes * Fix cox loss indexing * Fix main effects and tests * Fix lint * Use half interaction values on the off-diagonals * Fix lint again
This commit is contained in:
committed by
Vadim Khotilovich
parent
077abb35cd
commit
d878c36c84
@@ -992,7 +992,7 @@ class Booster(object):
|
||||
return self.eval_set([(data, name)], iteration)
|
||||
|
||||
def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False,
|
||||
pred_contribs=False, approx_contribs=False):
|
||||
pred_contribs=False, approx_contribs=False, pred_interactions=False):
|
||||
"""
|
||||
Predict with data.
|
||||
|
||||
@@ -1019,14 +1019,21 @@ class Booster(object):
|
||||
in both tree 1 and tree 0.
|
||||
|
||||
pred_contribs : bool
|
||||
When this option is on, the output will be a matrix of (nsample, nfeats+1)
|
||||
When this is True the output will be a matrix of size (nsample, nfeats + 1)
|
||||
with each record indicating the feature contributions (SHAP values) for that
|
||||
prediction. The sum of all feature contributions is equal to the prediction.
|
||||
Note that the bias is added as the final column, on top of the regular features.
|
||||
prediction. The sum of all feature contributions is equal to the raw untransformed
|
||||
margin value of the prediction. Note the final column is the bias term.
|
||||
|
||||
approx_contribs : bool
|
||||
Approximate the contributions of each feature
|
||||
|
||||
pred_interactions : bool
|
||||
When this is True the output will be a matrix of size (nsample, nfeats + 1, nfeats + 1)
|
||||
indicating the SHAP interaction values for each pair of features. The sum of each
|
||||
row (or column) of the interaction values equals the corresponding SHAP value (from
|
||||
pred_contribs), and the sum of the entire matrix equals the raw untransformed margin
|
||||
value of the prediction. Note the last row and column correspond to the bias term.
|
||||
|
||||
Returns
|
||||
-------
|
||||
prediction : numpy array
|
||||
@@ -1040,6 +1047,8 @@ class Booster(object):
|
||||
option_mask |= 0x04
|
||||
if approx_contribs:
|
||||
option_mask |= 0x08
|
||||
if pred_interactions:
|
||||
option_mask |= 0x10
|
||||
|
||||
self._validate_features(data)
|
||||
|
||||
@@ -1055,8 +1064,22 @@ class Booster(object):
|
||||
preds = preds.astype(np.int32)
|
||||
nrow = data.num_row()
|
||||
if preds.size != nrow and preds.size % nrow == 0:
|
||||
ncol = int(preds.size / nrow)
|
||||
preds = preds.reshape(nrow, ncol)
|
||||
chunk_size = int(preds.size / nrow)
|
||||
|
||||
if pred_interactions:
|
||||
ngroup = int(chunk_size / ((data.num_col() + 1) * (data.num_col() + 1)))
|
||||
if ngroup == 1:
|
||||
preds = preds.reshape(nrow, data.num_col() + 1, data.num_col() + 1)
|
||||
else:
|
||||
preds = preds.reshape(nrow, ngroup, data.num_col() + 1, data.num_col() + 1)
|
||||
elif pred_contribs:
|
||||
ngroup = int(chunk_size / (data.num_col() + 1))
|
||||
if ngroup == 1:
|
||||
preds = preds.reshape(nrow, data.num_col() + 1)
|
||||
else:
|
||||
preds = preds.reshape(nrow, ngroup, data.num_col() + 1)
|
||||
else:
|
||||
preds = preds.reshape(nrow, chunk_size)
|
||||
return preds
|
||||
|
||||
def save_model(self, fname):
|
||||
|
||||
Reference in New Issue
Block a user