Implement feature score in GBTree. (#7041)
* Categorical data support. * Eliminate text parsing during feature score computation.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) 2015~2020 by Contributors
|
||||
* Copyright (c) 2015~2021 by Contributors
|
||||
* \file c_api.h
|
||||
* \author Tianqi Chen
|
||||
* \brief C API of XGBoost, used for interfacing to other languages.
|
||||
@@ -1193,4 +1193,28 @@ XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
|
||||
XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
||||
bst_ulong *len,
|
||||
const char ***out_features);
|
||||
|
||||
/*!
|
||||
* \brief Calculate feature scores for tree models.
|
||||
*
|
||||
* \param handle An instance of Booster
|
||||
* \param json_config Parameters for computing scores. Accepted JSON keys are:
|
||||
* - importance_type: A JSON string with following possible values:
|
||||
* * 'weight': the number of times a feature is used to split the data across all trees.
|
||||
* * 'gain': the average gain across all splits the feature is used in.
|
||||
* * 'cover': the average coverage across all splits the feature is used in.
|
||||
* * 'total_gain': the total gain across all splits the feature is used in.
|
||||
* * 'total_cover': the total coverage across all splits the feature is used in.
|
||||
* - feature_map: An optional JSON string with URI or path to the feature map file.
|
||||
*
|
||||
* \param out_length Length of output arrays.
|
||||
* \param out_features An array of string as feature names, ordered the same as output scores.
|
||||
* \param out_scores An array of floating point as feature scores.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
|
||||
bst_ulong *out_length,
|
||||
const char ***out_features,
|
||||
float **out_scores);
|
||||
#endif // XGBOOST_C_API_H_
|
||||
|
||||
@@ -181,6 +181,12 @@ class GradientBooster : public Model, public Configurable {
|
||||
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const = 0;
|
||||
|
||||
virtual void FeatureScore(std::string const &importance_type,
|
||||
std::vector<bst_feature_t> *features,
|
||||
std::vector<float> *scores) const {
|
||||
LOG(FATAL) << "`feature_score` is not implemented for current booster.";
|
||||
}
|
||||
/*!
|
||||
* \brief Whether the current booster uses GPU.
|
||||
*/
|
||||
|
||||
@@ -152,6 +152,13 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
HostDeviceVector<bst_float> **out_preds,
|
||||
uint32_t layer_begin, uint32_t layer_end) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Calculate feature score. See doc in C API for outputs.
|
||||
*/
|
||||
virtual void CalcFeatureScore(std::string const &importance_type,
|
||||
std::vector<bst_feature_t> *features,
|
||||
std::vector<float> *scores) = 0;
|
||||
|
||||
/*
|
||||
* \brief Get number of boosted rounds from gradient booster.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user