From d83c8180001370e7a8e7fa510bc4f760c963d5cd Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Thu, 25 Oct 2018 11:12:41 -0700 Subject: [PATCH] Recommend pickling as the way to save XGBClassifier / XGBRegressor / XGBRanker (#3829) The `save_model()` and `load_model()` method only saves the part of the model that's common to all language interfaces and do not preserve Python-specific attributes, such as `feature_names`. More crucially, label encoder is not preserved either; this is needed for the scikit-learn wrapper, since you may have string labels. Fix: Explicitly recommend pickling as the way to save scikit-learn model objects. --- python-package/xgboost/sklearn.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index fd6d96d54..dcc4e85a9 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -241,6 +241,13 @@ class XGBModel(XGBModelBase): """ Save the model to a file. + The model is saved in an XGBoost internal binary format which is + universal among the various XGBoost interfaces. Auxiliary attributes of + the Python Booster object (such as feature names) will not be loaded. + Label encodings (text labels to numeric labels) will be also lost. + **If you are using only the Python interface, we recommend pickling the + model object for best results.** + Parameters ---------- fname : string @@ -252,6 +259,13 @@ class XGBModel(XGBModelBase): """ Load the model from a file. + The model is loaded from an XGBoost internal binary format which is + universal among the various XGBoost interfaces. Auxiliary attributes of + the Python Booster object (such as feature names) will not be loaded. + Label encodings (text labels to numeric labels) will be also lost. + **If you are using only the Python interface, we recommend pickling the + model object for best results.** + Parameters ---------- fname : string or a memory buffer