Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)
* Add interaction effects and cox loss * Minimize whitespace changes * Cox loss now no longer needs a pre-sorted dataset. * Address code review comments * Remove mem check, rename to pred_interactions, include bias * Make lint happy * More lint fixes * Fix cox loss indexing * Fix main effects and tests * Fix lint * Use half interaction values on the off-diagonals * Fix lint again
This commit is contained in:
committed by
Vadim Khotilovich
parent
077abb35cd
commit
d878c36c84
@@ -12,6 +12,7 @@
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include "./base.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -76,6 +77,19 @@ struct MetaInfo {
|
||||
inline unsigned GetRoot(size_t i) const {
|
||||
return root_index.size() != 0 ? root_index[i] : 0U;
|
||||
}
|
||||
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
|
||||
inline const std::vector<size_t>& LabelAbsSort() const {
|
||||
if (label_order_cache.size() == labels.size()) {
|
||||
return label_order_cache;
|
||||
}
|
||||
label_order_cache.resize(labels.size());
|
||||
std::iota(label_order_cache.begin(), label_order_cache.end(), 0);
|
||||
const auto l = labels;
|
||||
XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(),
|
||||
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
|
||||
|
||||
return label_order_cache;
|
||||
}
|
||||
/*! \brief clear all the information */
|
||||
void Clear();
|
||||
/*!
|
||||
@@ -96,6 +110,10 @@ struct MetaInfo {
|
||||
* \param num Number of elements in the source array.
|
||||
*/
|
||||
void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num);
|
||||
|
||||
private:
|
||||
/*! \brief argsort of labels */
|
||||
mutable std::vector<size_t> label_order_cache;
|
||||
};
|
||||
|
||||
/*! \brief read-only sparse instance batch in CSR format */
|
||||
|
||||
@@ -124,10 +124,17 @@ class GradientBooster {
|
||||
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
|
||||
* we do not limit number of trees
|
||||
* \param approximate use a faster (inconsistent) approximation of SHAP values
|
||||
* \param condition condition on the condition_feature (0=no, -1=cond off, 1=cond on).
|
||||
* \param condition_feature feature to condition on (i.e. fix) during calculations
|
||||
*/
|
||||
virtual void PredictContribution(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit = 0, bool approximate = false) = 0;
|
||||
unsigned ntree_limit = 0, bool approximate = false,
|
||||
int condition = 0, unsigned condition_feature = 0) = 0;
|
||||
|
||||
virtual void PredictInteractionContributions(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate) = 0;
|
||||
|
||||
/*!
|
||||
* \brief dump the model in the requested format
|
||||
|
||||
@@ -105,6 +105,7 @@ class Learner : public rabit::Serializable {
|
||||
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
|
||||
* \param pred_contribs whether to only predict the feature contributions
|
||||
* \param approx_contribs whether to approximate the feature contributions for speed
|
||||
* \param pred_interactions whether to compute the feature pair contributions
|
||||
*/
|
||||
virtual void Predict(DMatrix* data,
|
||||
bool output_margin,
|
||||
@@ -112,7 +113,9 @@ class Learner : public rabit::Serializable {
|
||||
unsigned ntree_limit = 0,
|
||||
bool pred_leaf = false,
|
||||
bool pred_contribs = false,
|
||||
bool approx_contribs = false) const = 0;
|
||||
bool approx_contribs = false,
|
||||
bool pred_interactions = false) const = 0;
|
||||
|
||||
/*!
|
||||
* \brief Set additional attribute to the Booster.
|
||||
* The property will be saved along the booster.
|
||||
|
||||
@@ -153,14 +153,24 @@ class Predictor {
|
||||
* a vector of length (nfeats + 1) * num_output_group * nsample, arranged in
|
||||
* that order.
|
||||
*
|
||||
* \param [in,out] dmat The input feature matrix.
|
||||
* \param [in,out] out_contribs The output feature contribs.
|
||||
* \param model Model to make predictions from.
|
||||
* \param ntree_limit (Optional) The ntree limit.
|
||||
* \param approximate Use fast approximate algorithm.
|
||||
* \param [in,out] dmat The input feature matrix.
|
||||
* \param [in,out] out_contribs The output feature contribs.
|
||||
* \param model Model to make predictions from.
|
||||
* \param ntree_limit (Optional) The ntree limit.
|
||||
* \param approximate Use fast approximate algorithm.
|
||||
* \param condition Condition on the condition_feature (0=no, -1=cond off, 1=cond on).
|
||||
* \param condition_feature Feature to condition on (i.e. fix) during calculations.
|
||||
*/
|
||||
|
||||
virtual void PredictContribution(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit = 0,
|
||||
bool approximate = false,
|
||||
int condition = 0,
|
||||
unsigned condition_feature = 0) = 0;
|
||||
|
||||
virtual void PredictInteractionContributions(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit = 0,
|
||||
|
||||
@@ -501,13 +501,33 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
* \param feat dense feature vector, if the feature is missing the field is set to NaN
|
||||
* \param root_id starting root index of the instance
|
||||
* \param out_contribs output vector to hold the contributions
|
||||
* \param condition fix one feature to either off (-1) on (1) or not fixed (0 default)
|
||||
* \param condition_feature the index of the feature to fix
|
||||
*/
|
||||
inline void CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const;
|
||||
bst_float *out_contribs,
|
||||
int condition = 0,
|
||||
unsigned condition_feature = 0) const;
|
||||
/*!
|
||||
* \brief Recursive function that computes the feature attributions for a single tree.
|
||||
* \param feat dense feature vector, if the feature is missing the field is set to NaN
|
||||
* \param phi dense output vector of feature attributions
|
||||
* \param node_index the index of the current node in the tree
|
||||
* \param unique_depth how many unique features are above the current node in the tree
|
||||
* \param parent_unique_path a vector of statistics about our current path through the tree
|
||||
* \param parent_zero_fraction what fraction of the parent path weight is coming as 0 (integrated)
|
||||
* \param parent_one_fraction what fraction of the parent path weight is coming as 1 (fixed)
|
||||
* \param parent_feature_index what feature the parent node used to split
|
||||
* \param condition fix one feature to either off (-1) on (1) or not fixed (0 default)
|
||||
* \param condition_feature the index of the feature to fix
|
||||
* \param condition_fraction what fraction of the current weight matches our conditioning feature
|
||||
*/
|
||||
inline void TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
unsigned node_index, unsigned unique_depth,
|
||||
PathElement *parent_unique_path, bst_float parent_zero_fraction,
|
||||
bst_float parent_one_fraction, int parent_feature_index) const;
|
||||
bst_float parent_one_fraction, int parent_feature_index,
|
||||
int condition, unsigned condition_feature,
|
||||
bst_float condition_fraction) const;
|
||||
|
||||
/*!
|
||||
* \brief calculate the approximate feature contributions for the given root
|
||||
@@ -700,7 +720,7 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
|
||||
/ static_cast<bst_float>((i + 1) * one_fraction);
|
||||
total += tmp;
|
||||
next_one_portion = unique_path[i].pweight - tmp * zero_fraction * ((unique_depth - i)
|
||||
/ static_cast<bst_float>(unique_depth+1));
|
||||
/ static_cast<bst_float>(unique_depth + 1));
|
||||
} else {
|
||||
total += (unique_path[i].pweight / zero_fraction) / ((unique_depth - i)
|
||||
/ static_cast<bst_float>(unique_depth + 1));
|
||||
@@ -713,15 +733,22 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
|
||||
inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
unsigned node_index, unsigned unique_depth,
|
||||
PathElement *parent_unique_path, bst_float parent_zero_fraction,
|
||||
bst_float parent_one_fraction, int parent_feature_index) const {
|
||||
bst_float parent_one_fraction, int parent_feature_index,
|
||||
int condition, unsigned condition_feature,
|
||||
bst_float condition_fraction) const {
|
||||
const auto node = (*this)[node_index];
|
||||
|
||||
// stop if we have no weight coming down to us
|
||||
if (condition_fraction == 0) return;
|
||||
|
||||
// extend the unique path
|
||||
PathElement *unique_path = parent_unique_path + unique_depth;
|
||||
if (unique_depth > 0) std::copy(parent_unique_path,
|
||||
parent_unique_path + unique_depth, unique_path);
|
||||
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
||||
parent_one_fraction, parent_feature_index);
|
||||
PathElement *unique_path = parent_unique_path + unique_depth + 1;
|
||||
std::copy(parent_unique_path, parent_unique_path + unique_depth + 1, unique_path);
|
||||
|
||||
if (condition == 0 || condition_feature != static_cast<unsigned>(parent_feature_index)) {
|
||||
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
||||
parent_one_fraction, parent_feature_index);
|
||||
}
|
||||
const unsigned split_index = node.split_index();
|
||||
|
||||
// leaf node
|
||||
@@ -729,7 +756,8 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
for (unsigned i = 1; i <= unique_depth; ++i) {
|
||||
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
|
||||
const PathElement &el = unique_path[i];
|
||||
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) * node.leaf_value();
|
||||
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction)
|
||||
* node.leaf_value() * condition_fraction;
|
||||
}
|
||||
|
||||
// internal node
|
||||
@@ -764,34 +792,44 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
unique_depth -= 1;
|
||||
}
|
||||
|
||||
// divide up the condition_fraction among the recursive calls
|
||||
bst_float hot_condition_fraction = condition_fraction;
|
||||
bst_float cold_condition_fraction = condition_fraction;
|
||||
if (condition > 0 && split_index == condition_feature) {
|
||||
cold_condition_fraction = 0;
|
||||
unique_depth -= 1;
|
||||
} else if (condition < 0 && split_index == condition_feature) {
|
||||
hot_condition_fraction *= hot_zero_fraction;
|
||||
cold_condition_fraction *= cold_zero_fraction;
|
||||
unique_depth -= 1;
|
||||
}
|
||||
|
||||
TreeShap(feat, phi, hot_index, unique_depth + 1, unique_path,
|
||||
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
|
||||
hot_zero_fraction * incoming_zero_fraction, incoming_one_fraction,
|
||||
split_index, condition, condition_feature, hot_condition_fraction);
|
||||
|
||||
TreeShap(feat, phi, cold_index, unique_depth + 1, unique_path,
|
||||
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
|
||||
cold_zero_fraction * incoming_zero_fraction, 0,
|
||||
split_index, condition, condition_feature, cold_condition_fraction);
|
||||
}
|
||||
}
|
||||
|
||||
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const {
|
||||
bst_float *out_contribs,
|
||||
int condition,
|
||||
unsigned condition_feature) const {
|
||||
// find the expected value of the tree's predictions
|
||||
bst_float base_value = 0.0f;
|
||||
bst_float total_cover = 0.0f;
|
||||
for (int i = 0; i < (*this).param.num_nodes; ++i) {
|
||||
const auto node = (*this)[i];
|
||||
if (node.is_leaf()) {
|
||||
const auto cover = this->stat(i).sum_hess;
|
||||
base_value += cover * node.leaf_value();
|
||||
total_cover += cover;
|
||||
}
|
||||
if (condition == 0) {
|
||||
bst_float node_value = this->node_mean_values[static_cast<int>(root_id)];
|
||||
out_contribs[feat.size()] += node_value;
|
||||
}
|
||||
out_contribs[feat.size()] += base_value / total_cover;
|
||||
|
||||
// Preallocate space for the unique path data
|
||||
const int maxd = this->MaxDepth(root_id) + 1;
|
||||
const int maxd = this->MaxDepth(root_id) + 2;
|
||||
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
|
||||
|
||||
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
|
||||
TreeShap(feat, out_contribs, root_id, 0, unique_path_data,
|
||||
1, 1, -1, condition, condition_feature, 1);
|
||||
delete[] unique_path_data;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user