Implement feature score for linear model. (#7048)

* Add feature score support for linear model. * Port R interface to the new implementation. * Add linear model support in Python. Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2021-06-25 14:34:02 +08:00
parent b2d300e727
commit 663136aa08
18 changed files with 367 additions and 232 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -12,6 +12,7 @@
 #include <string>
 #include <sstream>
 #include <algorithm>
+#include <numeric>

 #include "xgboost/gbm.h"
 #include "xgboost/json.h"
@@ -19,6 +20,7 @@
 #include "xgboost/linear_updater.h"
 #include "xgboost/logging.h"
 #include "xgboost/learner.h"
+#include "xgboost/linalg.h"

 #include "gblinear_model.h"
 #include "../common/timer.h"
@@ -219,6 +221,26 @@ class GBLinear : public GradientBooster {
    return model_.DumpModel(fmap, with_stats, format);
  }

+  void FeatureScore(std::string const &importance_type,
+                    std::vector<bst_feature_t> *out_features,
+                    std::vector<float> *out_scores) const override {
+    CHECK(!model_.weight.empty()) << "Model is not initialized";
+    CHECK_EQ(importance_type, "weight")
+        << "gblinear only has `weight` defined for feature importance.";
+    out_features->resize(this->learner_model_param_->num_feature, 0);
+    std::iota(out_features->begin(), out_features->end(), 0);
+    // Don't include the bias term in the feature importance scores
+    // The bias is the last weight
+    out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
+    auto n_groups = learner_model_param_->num_output_group;
+    MatrixView<float> scores{out_scores, {learner_model_param_->num_feature, n_groups}};
+    for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
+      for (bst_group_t g = 0; g < n_groups; ++g) {
+        scores(i, g) = model_[i][g];
+      }
+    }
+  }
+
  bool UseGPU() const override {
    if (param_.updater == "gpu_coord_descent") {
      return true;
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -325,16 +325,19 @@ class GBTree : public GradientBooster {
      add_score([&](auto const &p_tree, bst_node_t, bst_feature_t split) {
        gain_map[split] = split_counts[split];
      });
-    }
-    if (importance_type == "gain" || importance_type == "total_gain") {
+    } else if (importance_type == "gain" || importance_type == "total_gain") {
      add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
        gain_map[split] += p_tree->Stat(nidx).loss_chg;
      });
-    }
-    if (importance_type == "cover" || importance_type == "total_cover") {
+    } else if (importance_type == "cover" || importance_type == "total_cover") {
      add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
        gain_map[split] += p_tree->Stat(nidx).sum_hess;
      });
+    } else {
+      LOG(FATAL)
+          << "Unknown feature importance type, expected one of: "
+          << R"({"weight", "total_gain", "total_cover", "gain", "cover"}, got: )"
+          << importance_type;
    }
    if (importance_type == "gain" || importance_type == "cover") {
      for (size_t i = 0; i < gain_map.size(); ++i) {