Implement feature score in GBTree. (#7041)

* Categorical data support.
* Eliminate text parsing during feature score computation.
This commit is contained in:
Jiaming Yuan
2021-06-18 11:53:16 +08:00
committed by GitHub
parent dcd84b3979
commit 7dd29ffd47
10 changed files with 285 additions and 84 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) 2015~2020 by Contributors
* Copyright (c) 2015~2021 by Contributors
* \file c_api.h
* \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages.
@@ -1193,4 +1193,28 @@ XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
bst_ulong *len,
const char ***out_features);
/*!
* \brief Calculate feature scores for tree models.
*
* \param handle An instance of Booster
* \param json_config Parameters for computing scores. Accepted JSON keys are:
* - importance_type: A JSON string with following possible values:
* * 'weight': the number of times a feature is used to split the data across all trees.
* * 'gain': the average gain across all splits the feature is used in.
* * 'cover': the average coverage across all splits the feature is used in.
* * 'total_gain': the total gain across all splits the feature is used in.
* * 'total_cover': the total coverage across all splits the feature is used in.
* - feature_map: An optional JSON string with URI or path to the feature map file.
*
* \param out_length Length of output arrays.
* \param out_features An array of string as feature names, ordered the same as output scores.
* \param out_scores An array of floating point as feature scores.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
bst_ulong *out_length,
const char ***out_features,
float **out_scores);
#endif // XGBOOST_C_API_H_

View File

@@ -181,6 +181,12 @@ class GradientBooster : public Model, public Configurable {
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const = 0;
virtual void FeatureScore(std::string const &importance_type,
std::vector<bst_feature_t> *features,
std::vector<float> *scores) const {
LOG(FATAL) << "`feature_score` is not implemented for current booster.";
}
/*!
* \brief Whether the current booster uses GPU.
*/

View File

@@ -152,6 +152,13 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
HostDeviceVector<bst_float> **out_preds,
uint32_t layer_begin, uint32_t layer_end) = 0;
/*!
* \brief Calculate feature score. See doc in C API for outputs.
*/
virtual void CalcFeatureScore(std::string const &importance_type,
std::vector<bst_feature_t> *features,
std::vector<float> *scores) = 0;
/*
* \brief Get number of boosted rounds from gradient booster.
*/