[R] maintenance Nov 2017; SHAP plots (#2888)

* [R] fix predict contributions for data with no colnames

* [R] add a render parameter for xgb.plot.multi.trees; fixes #2628

* [R] update Rd's

* [R] remove unnecessary dep-package from R cmake install

* silence type warnings; readability

* [R] silence complaint about incomplete line at the end

* [R] initial version of xgb.plot.shap()

* [R] more work on xgb.plot.shap

* [R] enforce black font in xgb.plot.tree; fixes #2640

* [R] if feature names are available, check in predict that they are the same; fixes #2857

* [R] cran check and lint fixes

* remove tabs

* [R] add references; a test for plot.shap
This commit is contained in:
Vadim Khotilovich
2017-12-05 11:45:34 -06:00
committed by Tong He
parent 1b77903eeb
commit e8a6597957
19 changed files with 554 additions and 118 deletions

View File

@@ -516,7 +516,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
* \param out_contribs output vector to hold the contributions
*/
inline void CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const;
bst_float *out_contribs) const;
/*!
* \brief get next position of the tree given current pid
* \param pid Current node id.
@@ -619,7 +619,7 @@ inline bst_float RegTree::FillNodeMeanValue(int nid) {
}
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const {
bst_float *out_contribs) const {
CHECK_GT(this->node_mean_values.size(), 0U);
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
bst_float node_value;
@@ -647,16 +647,16 @@ inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, uns
// extend our decision path with a fraction of one and zero extensions
inline void ExtendPath(PathElement *unique_path, unsigned unique_depth,
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
unique_path[unique_depth].feature_index = feature_index;
unique_path[unique_depth].zero_fraction = zero_fraction;
unique_path[unique_depth].one_fraction = one_fraction;
unique_path[unique_depth].pweight = static_cast<bst_float>(unique_depth == 0 ? 1 : 0);
for (int i = unique_depth-1; i >= 0; i--) {
unique_path[i+1].pweight += one_fraction*unique_path[i].pweight*(i+1)
/ static_cast<bst_float>(unique_depth+1);
unique_path[i].pweight = zero_fraction*unique_path[i].pweight*(unique_depth-i)
/ static_cast<bst_float>(unique_depth+1);
unique_path[unique_depth].pweight = (unique_depth == 0 ? 1.0f : 0.0f);
for (int i = unique_depth - 1; i >= 0; i--) {
unique_path[i+1].pweight += one_fraction * unique_path[i].pweight * (i + 1)
/ static_cast<bst_float>(unique_depth + 1);
unique_path[i].pweight = zero_fraction * unique_path[i].pweight * (unique_depth - i)
/ static_cast<bst_float>(unique_depth + 1);
}
}
@@ -666,16 +666,16 @@ inline void UnwindPath(PathElement *unique_path, unsigned unique_depth, unsigned
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
bst_float next_one_portion = unique_path[unique_depth].pweight;
for (int i = unique_depth-1; i >= 0; --i) {
for (int i = unique_depth - 1; i >= 0; --i) {
if (one_fraction != 0) {
const bst_float tmp = unique_path[i].pweight;
unique_path[i].pweight = next_one_portion*(unique_depth+1)
/ static_cast<bst_float>((i+1)*one_fraction);
next_one_portion = tmp - unique_path[i].pweight*zero_fraction*(unique_depth-i)
/ static_cast<bst_float>(unique_depth+1);
unique_path[i].pweight = next_one_portion * (unique_depth + 1)
/ static_cast<bst_float>((i + 1) * one_fraction);
next_one_portion = tmp - unique_path[i].pweight * zero_fraction * (unique_depth - i)
/ static_cast<bst_float>(unique_depth + 1);
} else {
unique_path[i].pweight = (unique_path[i].pweight*(unique_depth+1))
/ static_cast<bst_float>(zero_fraction*(unique_depth-i));
unique_path[i].pweight = (unique_path[i].pweight * (unique_depth + 1))
/ static_cast<bst_float>(zero_fraction * (unique_depth - i));
}
}
@@ -694,16 +694,16 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
bst_float next_one_portion = unique_path[unique_depth].pweight;
bst_float total = 0;
for (int i = unique_depth-1; i >= 0; --i) {
for (int i = unique_depth - 1; i >= 0; --i) {
if (one_fraction != 0) {
const bst_float tmp = next_one_portion*(unique_depth+1)
/ static_cast<bst_float>((i+1)*one_fraction);
const bst_float tmp = next_one_portion * (unique_depth + 1)
/ static_cast<bst_float>((i + 1) * one_fraction);
total += tmp;
next_one_portion = unique_path[i].pweight - tmp*zero_fraction*((unique_depth-i)
next_one_portion = unique_path[i].pweight - tmp * zero_fraction * ((unique_depth - i)
/ static_cast<bst_float>(unique_depth+1));
} else {
total += (unique_path[i].pweight/zero_fraction)/((unique_depth-i)
/ static_cast<bst_float>(unique_depth+1));
total += (unique_path[i].pweight / zero_fraction) / ((unique_depth - i)
/ static_cast<bst_float>(unique_depth + 1));
}
}
return total;
@@ -718,7 +718,8 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
// extend the unique path
PathElement *unique_path = parent_unique_path + unique_depth;
if (unique_depth > 0) std::copy(parent_unique_path, parent_unique_path+unique_depth, unique_path);
if (unique_depth > 0) std::copy(parent_unique_path,
parent_unique_path + unique_depth, unique_path);
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
parent_one_fraction, parent_feature_index);
const unsigned split_index = node.split_index();
@@ -728,7 +729,7 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
for (unsigned i = 1; i <= unique_depth; ++i) {
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
const PathElement &el = unique_path[i];
phi[el.feature_index] += w*(el.one_fraction-el.zero_fraction)*node.leaf_value();
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) * node.leaf_value();
}
// internal node
@@ -742,10 +743,11 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
} else {
hot_index = node.cright();
}
const unsigned cold_index = (hot_index == node.cleft() ? node.cright() : node.cleft());
const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ?
node.cright() : node.cleft());
const bst_float w = this->stat(node_index).sum_hess;
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess/w;
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess/w;
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w;
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w;
bst_float incoming_zero_fraction = 1;
bst_float incoming_one_fraction = 1;
@@ -753,19 +755,19 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
// if so we undo that split so we can redo it for this node
unsigned path_index = 0;
for (; path_index <= unique_depth; ++path_index) {
if (unique_path[path_index].feature_index == split_index) break;
if (static_cast<unsigned>(unique_path[path_index].feature_index) == split_index) break;
}
if (path_index != unique_depth+1) {
if (path_index != unique_depth + 1) {
incoming_zero_fraction = unique_path[path_index].zero_fraction;
incoming_one_fraction = unique_path[path_index].one_fraction;
UnwindPath(unique_path, unique_depth, path_index);
unique_depth -= 1;
}
TreeShap(feat, phi, hot_index, unique_depth+1, unique_path,
TreeShap(feat, phi, hot_index, unique_depth + 1, unique_path,
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
TreeShap(feat, phi, cold_index, unique_depth+1, unique_path,
TreeShap(feat, phi, cold_index, unique_depth + 1, unique_path,
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
}
}
@@ -773,21 +775,21 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const {
// find the expected value of the tree's predictions
bst_float base_value = 0.0;
bst_float total_cover = 0;
bst_float base_value = 0.0f;
bst_float total_cover = 0.0f;
for (int i = 0; i < (*this).param.num_nodes; ++i) {
const auto node = (*this)[i];
if (node.is_leaf()) {
const auto cover = this->stat(i).sum_hess;
base_value += cover*node.leaf_value();
base_value += cover * node.leaf_value();
total_cover += cover;
}
}
out_contribs[feat.size()] += base_value / total_cover;
// Preallocate space for the unique path data
const int maxd = this->MaxDepth(root_id)+1;
PathElement *unique_path_data = new PathElement[(maxd*(maxd+1))/2];
const int maxd = this->MaxDepth(root_id) + 1;
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
delete[] unique_path_data;