Implement feature score for linear model. (#7048)

* Add feature score support for linear model.
* Port R interface to the new implementation.
* Add linear model support in Python.

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2021-06-25 14:34:02 +08:00
committed by GitHub
parent b2d300e727
commit 663136aa08
18 changed files with 367 additions and 232 deletions

View File

@@ -1195,10 +1195,13 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
const char ***out_features);
/*!
* \brief Calculate feature scores for tree models.
* \brief Calculate feature scores for tree models. When used on linear model, only the
* `weight` importance type is defined, and output scores is a row major matrix with shape
* [n_features, n_classes] for multi-class model. For tree model, out_n_feature is always
* equal to out_n_scores and has multiple definitions of importance type.
*
* \param handle An instance of Booster
* \param json_config Parameters for computing scores. Accepted JSON keys are:
* \param handle An instance of Booster
* \param json_config Parameters for computing scores. Accepted JSON keys are:
* - importance_type: A JSON string with following possible values:
* * 'weight': the number of times a feature is used to split the data across all trees.
* * 'gain': the average gain across all splits the feature is used in.
@@ -1206,15 +1209,20 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
* * 'total_gain': the total gain across all splits the feature is used in.
* * 'total_cover': the total coverage across all splits the feature is used in.
* - feature_map: An optional JSON string with URI or path to the feature map file.
* - feature_names: An optional JSON array with string names for each feature.
*
* \param out_length Length of output arrays.
* \param out_features An array of string as feature names, ordered the same as output scores.
* \param out_scores An array of floating point as feature scores.
* \param out_n_features Length of output feature names.
* \param out_features An array of string as feature names, ordered the same as output scores.
* \param out_dim Dimension of output feature scores.
* \param out_shape Shape of output feature scores with length of `out_dim`.
* \param out_scores An array of floating point as feature scores with shape of `out_shape`.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
bst_ulong *out_length,
const char ***out_features,
float **out_scores);
bst_ulong *out_n_features,
char const ***out_features,
bst_ulong *out_dim,
bst_ulong const **out_shape,
float const **out_scores);
#endif // XGBOOST_C_API_H_

View File

@@ -184,9 +184,7 @@ class GradientBooster : public Model, public Configurable {
virtual void FeatureScore(std::string const &importance_type,
std::vector<bst_feature_t> *features,
std::vector<float> *scores) const {
LOG(FATAL) << "`feature_score` is not implemented for current booster.";
}
std::vector<float> *scores) const = 0;
/*!
* \brief Whether the current booster uses GPU.
*/

View File

@@ -13,6 +13,7 @@
#include <array>
#include <algorithm>
#include <utility>
#include <vector>
namespace xgboost {
/*!
@@ -59,6 +60,13 @@ template <typename T> class MatrixView {
strides_[0] = shape[1];
strides_[1] = 1;
}
MatrixView(std::vector<T> *vec, std::array<size_t, 2> shape)
: device_{GenericParameter::kCpuId}, values_{*vec} {
CHECK_EQ(vec->size(), shape[0] * shape[1]);
std::copy(shape.cbegin(), shape.cend(), shape_);
strides_[0] = shape[1];
strides_[1] = 1;
}
MatrixView(HostDeviceVector<std::remove_const_t<T>> const *vec,
std::array<size_t, 2> shape, int32_t device)
: device_{device}, values_{InferValues(vec, device)} {