Implement feature score for linear model. (#7048)

* Add feature score support for linear model.
* Port R interface to the new implementation.
* Add linear model support in Python.

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2021-06-25 14:34:02 +08:00
committed by GitHub
parent b2d300e727
commit 663136aa08
18 changed files with 367 additions and 232 deletions

View File

@@ -12,6 +12,7 @@
#include <string>
#include <sstream>
#include <algorithm>
#include <numeric>
#include "xgboost/gbm.h"
#include "xgboost/json.h"
@@ -19,6 +20,7 @@
#include "xgboost/linear_updater.h"
#include "xgboost/logging.h"
#include "xgboost/learner.h"
#include "xgboost/linalg.h"
#include "gblinear_model.h"
#include "../common/timer.h"
@@ -219,6 +221,26 @@ class GBLinear : public GradientBooster {
return model_.DumpModel(fmap, with_stats, format);
}
void FeatureScore(std::string const &importance_type,
std::vector<bst_feature_t> *out_features,
std::vector<float> *out_scores) const override {
CHECK(!model_.weight.empty()) << "Model is not initialized";
CHECK_EQ(importance_type, "weight")
<< "gblinear only has `weight` defined for feature importance.";
out_features->resize(this->learner_model_param_->num_feature, 0);
std::iota(out_features->begin(), out_features->end(), 0);
// Don't include the bias term in the feature importance scores
// The bias is the last weight
out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
auto n_groups = learner_model_param_->num_output_group;
MatrixView<float> scores{out_scores, {learner_model_param_->num_feature, n_groups}};
for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
for (bst_group_t g = 0; g < n_groups; ++g) {
scores(i, g) = model_[i][g];
}
}
}
bool UseGPU() const override {
if (param_.updater == "gpu_coord_descent") {
return true;

View File

@@ -325,16 +325,19 @@ class GBTree : public GradientBooster {
add_score([&](auto const &p_tree, bst_node_t, bst_feature_t split) {
gain_map[split] = split_counts[split];
});
}
if (importance_type == "gain" || importance_type == "total_gain") {
} else if (importance_type == "gain" || importance_type == "total_gain") {
add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
gain_map[split] += p_tree->Stat(nidx).loss_chg;
});
}
if (importance_type == "cover" || importance_type == "total_cover") {
} else if (importance_type == "cover" || importance_type == "total_cover") {
add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
gain_map[split] += p_tree->Stat(nidx).sum_hess;
});
} else {
LOG(FATAL)
<< "Unknown feature importance type, expected one of: "
<< R"({"weight", "total_gain", "total_cover", "gain", "cover"}, got: )"
<< importance_type;
}
if (importance_type == "gain" || importance_type == "cover") {
for (size_t i = 0; i < gain_map.size(); ++i) {