Add SHAP interaction effects, fix minor bug, and add cox loss (#3043)

* Add interaction effects and cox loss

* Minimize whitespace changes

* Cox loss now no longer needs a pre-sorted dataset.

* Address code review comments

* Remove mem check, rename to pred_interactions, include bias

* Make lint happy

* More lint fixes

* Fix cox loss indexing

* Fix main effects and tests

* Fix lint

* Use half interaction values on the off-diagonals

* Fix lint again
This commit is contained in:
Scott Lundberg
2018-02-07 18:38:01 -08:00
committed by Vadim Khotilovich
parent 077abb35cd
commit d878c36c84
19 changed files with 638 additions and 125 deletions

View File

@@ -759,7 +759,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
&preds, ntree_limit,
(option_mask & 2) != 0,
(option_mask & 4) != 0,
(option_mask & 8) != 0);
(option_mask & 8) != 0,
(option_mask & 16) != 0);
*out_result = dmlc::BeginPtr(preds);
*len = static_cast<xgboost::bst_ulong>(preds.size());
API_END();

View File

@@ -224,7 +224,8 @@ class GBLinear : public GradientBooster {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
unsigned ntree_limit, bool approximate, int condition = 0,
unsigned condition_feature = 0) override {
if (model.weight.size() == 0) {
model.InitModel();
}
@@ -265,6 +266,17 @@ class GBLinear : public GradientBooster {
}
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
std::vector<bst_float>& contribs = *out_contribs;
// linear models have no interaction effects
const size_t nelements = model.param.num_feature*model.param.num_feature;
contribs.resize(p_fmat->info().num_row * nelements * model.param.num_output_group);
std::fill(contribs.begin(), contribs.end(), 0);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {

View File

@@ -220,10 +220,18 @@ class GBTree : public GradientBooster {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override {
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override {
predictor->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, approximate);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {

View File

@@ -443,9 +443,12 @@ class LearnerImpl : public Learner {
void Predict(DMatrix* data, bool output_margin,
std::vector<bst_float>* out_preds, unsigned ntree_limit,
bool pred_leaf, bool pred_contribs, bool approx_contribs) const override {
bool pred_leaf, bool pred_contribs, bool approx_contribs,
bool pred_interactions) const override {
if (pred_contribs) {
gbm_->PredictContribution(data, out_preds, ntree_limit, approx_contribs);
} else if (pred_interactions) {
gbm_->PredictInteractionContributions(data, out_preds, ntree_limit, approx_contribs);
} else if (pred_leaf) {
gbm_->PredictLeaf(data, out_preds, ntree_limit);
} else {

View File

@@ -304,6 +304,52 @@ struct EvalMAP : public EvalRankList {
}
};
/*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
struct EvalCox : public Metric {
public:
EvalCox() {}
bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info,
bool distributed) const override {
CHECK(!distributed) << "Cox metric does not support distributed evaluation";
using namespace std; // NOLINT(*)
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
const std::vector<size_t> &label_order = info.LabelAbsSort();
// pre-compute a sum for the denominator
double exp_p_sum = 0; // we use double because we might need the precision with large datasets
for (omp_ulong i = 0; i < ndata; ++i) {
exp_p_sum += preds[i];
}
double out = 0;
double accumulated_sum = 0;
bst_omp_uint num_events = 0;
for (bst_omp_uint i = 0; i < ndata; ++i) {
const size_t ind = label_order[i];
const auto label = info.labels[ind];
if (label > 0) {
out -= log(preds[ind]) - log(exp_p_sum);
++num_events;
}
// only update the denominator after we move forward in time (labels are sorted)
accumulated_sum += preds[ind];
if (i == ndata - 1 || std::abs(label) < std::abs(info.labels[label_order[i + 1]])) {
exp_p_sum -= accumulated_sum;
accumulated_sum = 0;
}
}
return out/num_events; // normalize by the number of events
}
const char* Name() const override {
return "cox-nloglik";
}
};
XGBOOST_REGISTER_METRIC(AMS, "ams")
.describe("AMS metric for higgs.")
.set_body([](const char* param) { return new EvalAMS(param); });
@@ -323,5 +369,9 @@ XGBOOST_REGISTER_METRIC(NDCG, "ndcg")
XGBOOST_REGISTER_METRIC(MAP, "map")
.describe("map@k for rank.")
.set_body([](const char* param) { return new EvalMAP(param); });
XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
.describe("Negative log partial likelihood of Cox proportioanl hazards model.")
.set_body([](const char* param) { return new EvalCox(); });
} // namespace metric
} // namespace xgboost

View File

@@ -197,6 +197,90 @@ XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
.describe("Possion regression for count data.")
.set_body([]() { return new PoissonRegression(); });
// cox regression for survival data (negative values mean they are censored)
class CoxRegression : public ObjFunction {
public:
// declare functions
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {}
void GetGradient(const std::vector<bst_float> &preds,
const MetaInfo &info,
int iter,
std::vector<bst_gpair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
out_gpair->resize(preds.size());
const std::vector<size_t> &label_order = info.LabelAbsSort();
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
// pre-compute a sum
double exp_p_sum = 0; // we use double because we might need the precision with large datasets
for (omp_ulong i = 0; i < ndata; ++i) {
exp_p_sum += std::exp(preds[label_order[i]]);
}
// start calculating grad and hess
double r_k = 0;
double s_k = 0;
double last_exp_p = 0.0;
double last_abs_y = 0.0;
double accumulated_sum = 0;
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
const size_t ind = label_order[i];
const double p = preds[ind];
const double exp_p = std::exp(p);
const double w = info.GetWeight(ind);
const double y = info.labels[ind];
const double abs_y = std::abs(y);
// only update the denominator after we move forward in time (labels are sorted)
// this is Breslow's method for ties
accumulated_sum += last_exp_p;
if (last_abs_y < abs_y) {
exp_p_sum -= accumulated_sum;
accumulated_sum = 0;
} else {
CHECK(last_abs_y <= abs_y) << "CoxRegression: labels must be in sorted order, " <<
"MetaInfo::LabelArgsort failed!";
}
if (y > 0) {
r_k += 1.0/exp_p_sum;
s_k += 1.0/(exp_p_sum*exp_p_sum);
}
const double grad = exp_p*r_k - static_cast<bst_float>(y > 0);
const double hess = exp_p*r_k - exp_p*exp_p * s_k;
out_gpair->at(ind) = bst_gpair(grad * w, hess * w);
last_abs_y = abs_y;
last_exp_p = exp_p;
}
}
void PredTransform(std::vector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]);
}
}
void EvalTransform(std::vector<bst_float> *io_preds) override {
PredTransform(io_preds);
}
bst_float ProbToMargin(bst_float base_score) const override {
return std::log(base_score);
}
const char* DefaultEvalMetric(void) const override {
return "cox-nloglik";
}
};
// register the objective function
XGBOOST_REGISTER_OBJECTIVE(CoxRegression, "survival:cox")
.describe("Cox regression for censored survival data (negative labels are considered censored).")
.set_body([]() { return new CoxRegression(); });
// gamma regression
class GammaRegression : public ObjFunction {
public:

View File

@@ -215,7 +215,9 @@ class CPUPredictor : public Predictor {
void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
bool approximate) override {
bool approximate,
int condition,
unsigned condition_feature) override {
const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature);
const MetaInfo& info = p_fmat->info();
@@ -232,12 +234,10 @@ class CPUPredictor : public Predictor {
// make sure contributions is zeroed, we could be reusing a previously
// allocated one
std::fill(contribs.begin(), contribs.end(), 0);
if (approximate) {
// initialize tree node mean values
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
model.trees[i]->FillNodeMeanValues();
}
// initialize tree node mean values
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
model.trees[i]->FillNodeMeanValues();
}
// start collecting the contributions
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
@@ -263,7 +263,8 @@ class CPUPredictor : public Predictor {
continue;
}
if (!approximate) {
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
model.trees[j]->CalculateContributions(feats, root_id, p_contribs,
condition, condition_feature);
} else {
model.trees[j]->CalculateContributionsApprox(feats, root_id, p_contribs);
}
@@ -279,6 +280,50 @@ class CPUPredictor : public Predictor {
}
}
}
void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
bool approximate) override {
const MetaInfo& info = p_fmat->info();
const int ngroup = model.param.num_output_group;
size_t ncolumns = model.param.num_feature;
const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);
const unsigned crow_chunk = ngroup * (ncolumns + 1);
// allocate space for (number of features^2) times the number of rows and tmp off/on contribs
std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(info.num_row * ngroup * (ncolumns + 1) * (ncolumns + 1));
std::vector<bst_float> contribs_off(info.num_row * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_on(info.num_row * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_diag(info.num_row * ngroup * (ncolumns + 1));
// Compute the difference in effects when conditioning on each of the features on and off
// see: Axiomatic characterizations of probabilistic and
// cardinal-probabilistic interaction indices
PredictContribution(p_fmat, &contribs_diag, model, ntree_limit, approximate, 0, 0);
for (size_t i = 0; i < ncolumns + 1; ++i) {
PredictContribution(p_fmat, &contribs_off, model, ntree_limit, approximate, -1, i);
PredictContribution(p_fmat, &contribs_on, model, ntree_limit, approximate, 1, i);
for (size_t j = 0; j < info.num_row; ++j) {
for (int l = 0; l < ngroup; ++l) {
const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);
const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);
contribs[o_offset + i] = 0;
for (size_t k = 0; k < ncolumns + 1; ++k) {
// fill in the diagonal with additive effects, and off-diagonal with the interactions
if (k == i) {
contribs[o_offset + i] += contribs_diag[c_offset + k];
} else {
contribs[o_offset + k] = (contribs_on[c_offset + k] - contribs_off[c_offset + k])/2.0;
contribs[o_offset + i] -= contribs[o_offset + k];
}
}
}
}
}
}
std::vector<RegTree::FVec> thread_temp;
};

View File

@@ -454,10 +454,22 @@ class GPUPredictor : public xgboost::Predictor {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit,
bool approximate) override {
cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit,
approximate);
const gbm::GBTreeModel& model,
unsigned ntree_limit,
bool approximate,
int condition,
unsigned condition_feature) override {
cpu_predictor->PredictContribution(p_fmat, out_contribs, model,
ntree_limit, approximate, condition, condition_feature);
}
void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit,
bool approximate) override {
cpu_predictor->PredictInteractionContributions(p_fmat, out_contribs, model,
ntree_limit, approximate);
}
void Init(const std::vector<std::pair<std::string, std::string>>& cfg,