[R] Fix global feature importance and predict with 1 sample. (#7394)
* [R] Fix global feature importance. * Add implementation for tree index. The parameter is not documented in C API since we should work on porting the model slicing to R instead of supporting more use of tree index. * Fix the difference between "gain" and "total_gain". * debug. * Fix prediction.
This commit is contained in:
@@ -1159,9 +1159,17 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
|
||||
custom_feature_names = get<Array const>(config["feature_names"]);
|
||||
}
|
||||
|
||||
auto& scores = learner->GetThreadLocal().ret_vec_float;
|
||||
std::vector<int32_t> tree_idx;
|
||||
if (!IsA<Null>(config["tree_idx"])) {
|
||||
auto j_tree_idx = get<Array const>(config["tree_idx"]);
|
||||
for (auto const &idx : j_tree_idx) {
|
||||
tree_idx.push_back(get<Integer const>(idx));
|
||||
}
|
||||
}
|
||||
|
||||
auto &scores = learner->GetThreadLocal().ret_vec_float;
|
||||
std::vector<bst_feature_t> features;
|
||||
learner->CalcFeatureScore(importance, &features, &scores);
|
||||
learner->CalcFeatureScore(importance, common::Span<int32_t const>(tree_idx), &features, &scores);
|
||||
|
||||
auto n_features = learner->GetNumFeature();
|
||||
GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map);
|
||||
|
||||
@@ -232,9 +232,11 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
|
||||
void FeatureScore(std::string const &importance_type,
|
||||
common::Span<int32_t const> trees,
|
||||
std::vector<bst_feature_t> *out_features,
|
||||
std::vector<float> *out_scores) const override {
|
||||
CHECK(!model_.weight.empty()) << "Model is not initialized";
|
||||
CHECK(trees.empty()) << "gblinear doesn't support number of trees for feature importance.";
|
||||
CHECK_EQ(importance_type, "weight")
|
||||
<< "gblinear only has `weight` defined for feature importance.";
|
||||
out_features->resize(this->learner_model_param_->num_feature, 0);
|
||||
|
||||
@@ -300,18 +300,28 @@ class GBTree : public GradientBooster {
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureScore(std::string const &importance_type,
|
||||
std::vector<bst_feature_t> *features,
|
||||
std::vector<float> *scores) const override {
|
||||
void FeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
|
||||
std::vector<bst_feature_t>* features,
|
||||
std::vector<float>* scores) const override {
|
||||
// Because feature with no importance doesn't appear in the return value so
|
||||
// we need to set up another pair of vectors to store the values during
|
||||
// computation.
|
||||
std::vector<size_t> split_counts(this->model_.learner_model_param->num_feature, 0);
|
||||
std::vector<float> gain_map(this->model_.learner_model_param->num_feature, 0);
|
||||
std::vector<int32_t> tree_idx;
|
||||
if (trees.empty()) {
|
||||
tree_idx.resize(this->model_.trees.size());
|
||||
std::iota(tree_idx.begin(), tree_idx.end(), 0);
|
||||
trees = common::Span<int32_t const>(tree_idx);
|
||||
}
|
||||
|
||||
auto total_n_trees = model_.trees.size();
|
||||
auto add_score = [&](auto fn) {
|
||||
for (auto const &p_tree : model_.trees) {
|
||||
for (auto idx : trees) {
|
||||
CHECK_LE(idx, total_n_trees) << "Invalid tree index.";
|
||||
auto const& p_tree = model_.trees[idx];
|
||||
p_tree->WalkTree([&](bst_node_t nidx) {
|
||||
auto const &node = (*p_tree)[nidx];
|
||||
auto const& node = (*p_tree)[nidx];
|
||||
if (!node.IsLeaf()) {
|
||||
split_counts[node.SplitIndex()]++;
|
||||
fn(p_tree, nidx, node.SplitIndex());
|
||||
|
||||
@@ -1212,11 +1212,10 @@ class LearnerImpl : public LearnerIO {
|
||||
*out_preds = &out_predictions.predictions;
|
||||
}
|
||||
|
||||
void CalcFeatureScore(std::string const &importance_type,
|
||||
std::vector<bst_feature_t> *features,
|
||||
std::vector<float> *scores) override {
|
||||
void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
|
||||
std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
|
||||
this->Configure();
|
||||
gbm_->FeatureScore(importance_type, features, scores);
|
||||
gbm_->FeatureScore(importance_type, trees, features, scores);
|
||||
}
|
||||
|
||||
const std::map<std::string, std::string>& GetConfigurationArguments() const override {
|
||||
|
||||
Reference in New Issue
Block a user