[R] maintenance Nov 2017; SHAP plots (#2888)
* [R] fix predict contributions for data with no colnames * [R] add a render parameter for xgb.plot.multi.trees; fixes #2628 * [R] update Rd's * [R] remove unnecessary dep-package from R cmake install * silence type warnings; readability * [R] silence complaint about incomplete line at the end * [R] initial version of xgb.plot.shap() * [R] more work on xgb.plot.shap * [R] enforce black font in xgb.plot.tree; fixes #2640 * [R] if feature names are available, check in predict that they are the same; fixes #2857 * [R] cran check and lint fixes * remove tabs * [R] add references; a test for plot.shap
This commit is contained in:
committed by
Tong He
parent
1b77903eeb
commit
e8a6597957
@@ -516,7 +516,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
* \param out_contribs output vector to hold the contributions
|
||||
*/
|
||||
inline void CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const;
|
||||
bst_float *out_contribs) const;
|
||||
/*!
|
||||
* \brief get next position of the tree given current pid
|
||||
* \param pid Current node id.
|
||||
@@ -619,7 +619,7 @@ inline bst_float RegTree::FillNodeMeanValue(int nid) {
|
||||
}
|
||||
|
||||
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const {
|
||||
bst_float *out_contribs) const {
|
||||
CHECK_GT(this->node_mean_values.size(), 0U);
|
||||
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
|
||||
bst_float node_value;
|
||||
@@ -647,16 +647,16 @@ inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, uns
|
||||
|
||||
// extend our decision path with a fraction of one and zero extensions
|
||||
inline void ExtendPath(PathElement *unique_path, unsigned unique_depth,
|
||||
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
|
||||
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
|
||||
unique_path[unique_depth].feature_index = feature_index;
|
||||
unique_path[unique_depth].zero_fraction = zero_fraction;
|
||||
unique_path[unique_depth].one_fraction = one_fraction;
|
||||
unique_path[unique_depth].pweight = static_cast<bst_float>(unique_depth == 0 ? 1 : 0);
|
||||
for (int i = unique_depth-1; i >= 0; i--) {
|
||||
unique_path[i+1].pweight += one_fraction*unique_path[i].pweight*(i+1)
|
||||
/ static_cast<bst_float>(unique_depth+1);
|
||||
unique_path[i].pweight = zero_fraction*unique_path[i].pweight*(unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1);
|
||||
unique_path[unique_depth].pweight = (unique_depth == 0 ? 1.0f : 0.0f);
|
||||
for (int i = unique_depth - 1; i >= 0; i--) {
|
||||
unique_path[i+1].pweight += one_fraction * unique_path[i].pweight * (i + 1)
|
||||
/ static_cast<bst_float>(unique_depth + 1);
|
||||
unique_path[i].pweight = zero_fraction * unique_path[i].pweight * (unique_depth - i)
|
||||
/ static_cast<bst_float>(unique_depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -666,16 +666,16 @@ inline void UnwindPath(PathElement *unique_path, unsigned unique_depth, unsigned
|
||||
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
||||
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
||||
|
||||
for (int i = unique_depth-1; i >= 0; --i) {
|
||||
for (int i = unique_depth - 1; i >= 0; --i) {
|
||||
if (one_fraction != 0) {
|
||||
const bst_float tmp = unique_path[i].pweight;
|
||||
unique_path[i].pweight = next_one_portion*(unique_depth+1)
|
||||
/ static_cast<bst_float>((i+1)*one_fraction);
|
||||
next_one_portion = tmp - unique_path[i].pweight*zero_fraction*(unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1);
|
||||
unique_path[i].pweight = next_one_portion * (unique_depth + 1)
|
||||
/ static_cast<bst_float>((i + 1) * one_fraction);
|
||||
next_one_portion = tmp - unique_path[i].pweight * zero_fraction * (unique_depth - i)
|
||||
/ static_cast<bst_float>(unique_depth + 1);
|
||||
} else {
|
||||
unique_path[i].pweight = (unique_path[i].pweight*(unique_depth+1))
|
||||
/ static_cast<bst_float>(zero_fraction*(unique_depth-i));
|
||||
unique_path[i].pweight = (unique_path[i].pweight * (unique_depth + 1))
|
||||
/ static_cast<bst_float>(zero_fraction * (unique_depth - i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -694,16 +694,16 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
|
||||
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
||||
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
||||
bst_float total = 0;
|
||||
for (int i = unique_depth-1; i >= 0; --i) {
|
||||
for (int i = unique_depth - 1; i >= 0; --i) {
|
||||
if (one_fraction != 0) {
|
||||
const bst_float tmp = next_one_portion*(unique_depth+1)
|
||||
/ static_cast<bst_float>((i+1)*one_fraction);
|
||||
const bst_float tmp = next_one_portion * (unique_depth + 1)
|
||||
/ static_cast<bst_float>((i + 1) * one_fraction);
|
||||
total += tmp;
|
||||
next_one_portion = unique_path[i].pweight - tmp*zero_fraction*((unique_depth-i)
|
||||
next_one_portion = unique_path[i].pweight - tmp * zero_fraction * ((unique_depth - i)
|
||||
/ static_cast<bst_float>(unique_depth+1));
|
||||
} else {
|
||||
total += (unique_path[i].pweight/zero_fraction)/((unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1));
|
||||
total += (unique_path[i].pweight / zero_fraction) / ((unique_depth - i)
|
||||
/ static_cast<bst_float>(unique_depth + 1));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
@@ -718,7 +718,8 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
|
||||
// extend the unique path
|
||||
PathElement *unique_path = parent_unique_path + unique_depth;
|
||||
if (unique_depth > 0) std::copy(parent_unique_path, parent_unique_path+unique_depth, unique_path);
|
||||
if (unique_depth > 0) std::copy(parent_unique_path,
|
||||
parent_unique_path + unique_depth, unique_path);
|
||||
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
||||
parent_one_fraction, parent_feature_index);
|
||||
const unsigned split_index = node.split_index();
|
||||
@@ -728,7 +729,7 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
for (unsigned i = 1; i <= unique_depth; ++i) {
|
||||
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
|
||||
const PathElement &el = unique_path[i];
|
||||
phi[el.feature_index] += w*(el.one_fraction-el.zero_fraction)*node.leaf_value();
|
||||
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) * node.leaf_value();
|
||||
}
|
||||
|
||||
// internal node
|
||||
@@ -742,10 +743,11 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
} else {
|
||||
hot_index = node.cright();
|
||||
}
|
||||
const unsigned cold_index = (hot_index == node.cleft() ? node.cright() : node.cleft());
|
||||
const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ?
|
||||
node.cright() : node.cleft());
|
||||
const bst_float w = this->stat(node_index).sum_hess;
|
||||
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess/w;
|
||||
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess/w;
|
||||
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w;
|
||||
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w;
|
||||
bst_float incoming_zero_fraction = 1;
|
||||
bst_float incoming_one_fraction = 1;
|
||||
|
||||
@@ -753,19 +755,19 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
// if so we undo that split so we can redo it for this node
|
||||
unsigned path_index = 0;
|
||||
for (; path_index <= unique_depth; ++path_index) {
|
||||
if (unique_path[path_index].feature_index == split_index) break;
|
||||
if (static_cast<unsigned>(unique_path[path_index].feature_index) == split_index) break;
|
||||
}
|
||||
if (path_index != unique_depth+1) {
|
||||
if (path_index != unique_depth + 1) {
|
||||
incoming_zero_fraction = unique_path[path_index].zero_fraction;
|
||||
incoming_one_fraction = unique_path[path_index].one_fraction;
|
||||
UnwindPath(unique_path, unique_depth, path_index);
|
||||
unique_depth -= 1;
|
||||
}
|
||||
|
||||
TreeShap(feat, phi, hot_index, unique_depth+1, unique_path,
|
||||
TreeShap(feat, phi, hot_index, unique_depth + 1, unique_path,
|
||||
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
|
||||
|
||||
TreeShap(feat, phi, cold_index, unique_depth+1, unique_path,
|
||||
TreeShap(feat, phi, cold_index, unique_depth + 1, unique_path,
|
||||
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
|
||||
}
|
||||
}
|
||||
@@ -773,21 +775,21 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const {
|
||||
// find the expected value of the tree's predictions
|
||||
bst_float base_value = 0.0;
|
||||
bst_float total_cover = 0;
|
||||
bst_float base_value = 0.0f;
|
||||
bst_float total_cover = 0.0f;
|
||||
for (int i = 0; i < (*this).param.num_nodes; ++i) {
|
||||
const auto node = (*this)[i];
|
||||
if (node.is_leaf()) {
|
||||
const auto cover = this->stat(i).sum_hess;
|
||||
base_value += cover*node.leaf_value();
|
||||
base_value += cover * node.leaf_value();
|
||||
total_cover += cover;
|
||||
}
|
||||
}
|
||||
out_contribs[feat.size()] += base_value / total_cover;
|
||||
|
||||
// Preallocate space for the unique path data
|
||||
const int maxd = this->MaxDepth(root_id)+1;
|
||||
PathElement *unique_path_data = new PathElement[(maxd*(maxd+1))/2];
|
||||
const int maxd = this->MaxDepth(root_id) + 1;
|
||||
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
|
||||
|
||||
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
|
||||
delete[] unique_path_data;
|
||||
|
||||
Reference in New Issue
Block a user