[breaking] Save booster feature info in JSON, remove feature name generation. (#6605)

* Save feature info in booster in JSON model.
* [breaking] Remove automatic feature name generation in `DMatrix`.

This PR is to enable reliable feature validation in Python package.
This commit is contained in:
Jiaming Yuan
2021-02-25 18:54:16 +08:00
committed by GitHub
parent b6167cd2ff
commit 9da2287ab8
12 changed files with 363 additions and 36 deletions

View File

@@ -1132,4 +1132,46 @@ XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
bst_ulong* out_len,
const char*** out);
/*!
* \brief Set string encoded feature info in Booster, similar to the feature
* info in DMatrix.
*
* Accepted fields are:
* - feature_name
* - feature_type
*
* \param handle An instance of Booster
* \param field Feild name
* \param features Pointer to array of strings.
* \param size Size of `features` pointer (number of strings passed in).
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
const char **features,
const bst_ulong size);
/*!
* \brief Get string encoded feature info from Booster, similar to feature info
* in DMatrix.
*
* Accepted fields are:
* - feature_name
* - feature_type
*
* Caller is responsible for copying out the data, before next call to any API
* function of XGBoost.
*
* \param handle An instance of Booster
* \param field Feild name
* \param size Size of output pointer `features` (number of strings returned).
* \param out_features Address of a pointer to array of strings. Result is stored in
* thread local memory.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
bst_ulong *len,
const char ***out_features);
#endif // XGBOOST_C_API_H_

View File

@@ -213,6 +213,27 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \return vector of attribute name strings.
*/
virtual std::vector<std::string> GetAttrNames() const = 0;
/*!
* \brief Set the feature names for current booster.
* \param fn Input feature names
*/
virtual void SetFeatureNames(std::vector<std::string> const& fn) = 0;
/*!
* \brief Get the feature names for current booster.
* \param fn Output feature names
*/
virtual void GetFeatureNames(std::vector<std::string>* fn) const = 0;
/*!
* \brief Set the feature types for current booster.
* \param ft Input feature types.
*/
virtual void SetFeatureTypes(std::vector<std::string> const& ft) = 0;
/*!
* \brief Get the feature types for current booster.
* \param fn Output feature types
*/
virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;
/*!
* \return whether the model allow lazy checkpoint in rabit.
*/