Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)

* Add interaction effects and cox loss

* Minimize whitespace changes

* Cox loss now no longer needs a pre-sorted dataset.

* Address code review comments

* Remove mem check, rename to pred_interactions, include bias

* Make lint happy

* More lint fixes

* Fix cox loss indexing

* Fix main effects and tests

* Fix lint

* Use half interaction values on the off-diagonals

* Fix lint again
This commit is contained in:
Scott Lundberg
2018-02-07 18:38:01 -08:00
committed by Vadim Khotilovich
parent 077abb35cd
commit d878c36c84
19 changed files with 638 additions and 125 deletions

View File

@@ -12,6 +12,7 @@
#include <string>
#include <memory>
#include <vector>
#include <numeric>
#include "./base.h"
namespace xgboost {
@@ -76,6 +77,19 @@ struct MetaInfo {
inline unsigned GetRoot(size_t i) const {
return root_index.size() != 0 ? root_index[i] : 0U;
}
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
inline const std::vector<size_t>& LabelAbsSort() const {
if (label_order_cache.size() == labels.size()) {
return label_order_cache;
}
label_order_cache.resize(labels.size());
std::iota(label_order_cache.begin(), label_order_cache.end(), 0);
const auto l = labels;
XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(),
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
return label_order_cache;
}
/*! \brief clear all the information */
void Clear();
/*!
@@ -96,6 +110,10 @@ struct MetaInfo {
* \param num Number of elements in the source array.
*/
void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num);
private:
/*! \brief argsort of labels */
mutable std::vector<size_t> label_order_cache;
};
/*! \brief read-only sparse instance batch in CSR format */

View File

@@ -124,10 +124,17 @@ class GradientBooster {
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees
* \param approximate use a faster (inconsistent) approximation of SHAP values
* \param condition condition on the condition_feature (0=no, -1=cond off, 1=cond on).
* \param condition_feature feature to condition on (i.e. fix) during calculations
*/
virtual void PredictContribution(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit = 0, bool approximate = false) = 0;
unsigned ntree_limit = 0, bool approximate = false,
int condition = 0, unsigned condition_feature = 0) = 0;
virtual void PredictInteractionContributions(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) = 0;
/*!
* \brief dump the model in the requested format

View File

@@ -105,6 +105,7 @@ class Learner : public rabit::Serializable {
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
* \param pred_contribs whether to only predict the feature contributions
* \param approx_contribs whether to approximate the feature contributions for speed
* \param pred_interactions whether to compute the feature pair contributions
*/
virtual void Predict(DMatrix* data,
bool output_margin,
@@ -112,7 +113,9 @@ class Learner : public rabit::Serializable {
unsigned ntree_limit = 0,
bool pred_leaf = false,
bool pred_contribs = false,
bool approx_contribs = false) const = 0;
bool approx_contribs = false,
bool pred_interactions = false) const = 0;
/*!
* \brief Set additional attribute to the Booster.
* The property will be saved along the booster.

View File

@@ -153,14 +153,24 @@ class Predictor {
* a vector of length (nfeats + 1) * num_output_group * nsample, arranged in
* that order.
*
* \param [in,out] dmat The input feature matrix.
* \param [in,out] out_contribs The output feature contribs.
* \param model Model to make predictions from.
* \param ntree_limit (Optional) The ntree limit.
* \param approximate Use fast approximate algorithm.
* \param [in,out] dmat The input feature matrix.
* \param [in,out] out_contribs The output feature contribs.
* \param model Model to make predictions from.
* \param ntree_limit (Optional) The ntree limit.
* \param approximate Use fast approximate algorithm.
* \param condition Condition on the condition_feature (0=no, -1=cond off, 1=cond on).
* \param condition_feature Feature to condition on (i.e. fix) during calculations.
*/
virtual void PredictContribution(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,
bool approximate = false,
int condition = 0,
unsigned condition_feature = 0) = 0;
virtual void PredictInteractionContributions(DMatrix* dmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit = 0,

View File

@@ -501,13 +501,33 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
* \param feat dense feature vector, if the feature is missing the field is set to NaN
* \param root_id starting root index of the instance
* \param out_contribs output vector to hold the contributions
* \param condition fix one feature to either off (-1) on (1) or not fixed (0 default)
* \param condition_feature the index of the feature to fix
*/
inline void CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const;
bst_float *out_contribs,
int condition = 0,
unsigned condition_feature = 0) const;
/*!
* \brief Recursive function that computes the feature attributions for a single tree.
* \param feat dense feature vector, if the feature is missing the field is set to NaN
* \param phi dense output vector of feature attributions
* \param node_index the index of the current node in the tree
* \param unique_depth how many unique features are above the current node in the tree
* \param parent_unique_path a vector of statistics about our current path through the tree
* \param parent_zero_fraction what fraction of the parent path weight is coming as 0 (integrated)
* \param parent_one_fraction what fraction of the parent path weight is coming as 1 (fixed)
* \param parent_feature_index what feature the parent node used to split
* \param condition fix one feature to either off (-1) on (1) or not fixed (0 default)
* \param condition_feature the index of the feature to fix
* \param condition_fraction what fraction of the current weight matches our conditioning feature
*/
inline void TreeShap(const RegTree::FVec& feat, bst_float *phi,
unsigned node_index, unsigned unique_depth,
PathElement *parent_unique_path, bst_float parent_zero_fraction,
bst_float parent_one_fraction, int parent_feature_index) const;
bst_float parent_one_fraction, int parent_feature_index,
int condition, unsigned condition_feature,
bst_float condition_fraction) const;
/*!
* \brief calculate the approximate feature contributions for the given root
@@ -700,7 +720,7 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
/ static_cast<bst_float>((i + 1) * one_fraction);
total += tmp;
next_one_portion = unique_path[i].pweight - tmp * zero_fraction * ((unique_depth - i)
/ static_cast<bst_float>(unique_depth+1));
/ static_cast<bst_float>(unique_depth + 1));
} else {
total += (unique_path[i].pweight / zero_fraction) / ((unique_depth - i)
/ static_cast<bst_float>(unique_depth + 1));
@@ -713,15 +733,22 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
unsigned node_index, unsigned unique_depth,
PathElement *parent_unique_path, bst_float parent_zero_fraction,
bst_float parent_one_fraction, int parent_feature_index) const {
bst_float parent_one_fraction, int parent_feature_index,
int condition, unsigned condition_feature,
bst_float condition_fraction) const {
const auto node = (*this)[node_index];
// stop if we have no weight coming down to us
if (condition_fraction == 0) return;
// extend the unique path
PathElement *unique_path = parent_unique_path + unique_depth;
if (unique_depth > 0) std::copy(parent_unique_path,
parent_unique_path + unique_depth, unique_path);
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
parent_one_fraction, parent_feature_index);
PathElement *unique_path = parent_unique_path + unique_depth + 1;
std::copy(parent_unique_path, parent_unique_path + unique_depth + 1, unique_path);
if (condition == 0 || condition_feature != static_cast<unsigned>(parent_feature_index)) {
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
parent_one_fraction, parent_feature_index);
}
const unsigned split_index = node.split_index();
// leaf node
@@ -729,7 +756,8 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
for (unsigned i = 1; i <= unique_depth; ++i) {
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
const PathElement &el = unique_path[i];
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) * node.leaf_value();
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction)
* node.leaf_value() * condition_fraction;
}
// internal node
@@ -764,34 +792,44 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
unique_depth -= 1;
}
// divide up the condition_fraction among the recursive calls
bst_float hot_condition_fraction = condition_fraction;
bst_float cold_condition_fraction = condition_fraction;
if (condition > 0 && split_index == condition_feature) {
cold_condition_fraction = 0;
unique_depth -= 1;
} else if (condition < 0 && split_index == condition_feature) {
hot_condition_fraction *= hot_zero_fraction;
cold_condition_fraction *= cold_zero_fraction;
unique_depth -= 1;
}
TreeShap(feat, phi, hot_index, unique_depth + 1, unique_path,
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
hot_zero_fraction * incoming_zero_fraction, incoming_one_fraction,
split_index, condition, condition_feature, hot_condition_fraction);
TreeShap(feat, phi, cold_index, unique_depth + 1, unique_path,
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
cold_zero_fraction * incoming_zero_fraction, 0,
split_index, condition, condition_feature, cold_condition_fraction);
}
}
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const {
bst_float *out_contribs,
int condition,
unsigned condition_feature) const {
// find the expected value of the tree's predictions
bst_float base_value = 0.0f;
bst_float total_cover = 0.0f;
for (int i = 0; i < (*this).param.num_nodes; ++i) {
const auto node = (*this)[i];
if (node.is_leaf()) {
const auto cover = this->stat(i).sum_hess;
base_value += cover * node.leaf_value();
total_cover += cover;
}
if (condition == 0) {
bst_float node_value = this->node_mean_values[static_cast<int>(root_id)];
out_contribs[feat.size()] += node_value;
}
out_contribs[feat.size()] += base_value / total_cover;
// Preallocate space for the unique path data
const int maxd = this->MaxDepth(root_id) + 1;
const int maxd = this->MaxDepth(root_id) + 2;
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
TreeShap(feat, out_contribs, root_id, 0, unique_path_data,
1, 1, -1, condition, condition_feature, 1);
delete[] unique_path_data;
}