[breaking] Remove duplicated predict functions, Fix attributes IO. (#6593)
* Fix attributes not being restored. * Rename all `data` to `X`. [breaking]
This commit is contained in:
parent
7f4d3a91b9
commit
0027220aa0
@ -1741,6 +1741,13 @@ class Booster(object):
|
|||||||
else:
|
else:
|
||||||
raise TypeError('Unknown file type: ', fname)
|
raise TypeError('Unknown file type: ', fname)
|
||||||
|
|
||||||
|
if self.attr("best_iteration") is not None:
|
||||||
|
self.best_iteration = int(self.attr("best_iteration"))
|
||||||
|
if self.attr("best_score") is not None:
|
||||||
|
self.best_score = float(self.attr("best_score"))
|
||||||
|
if self.attr("best_ntree_limit") is not None:
|
||||||
|
self.best_ntree_limit = int(self.attr("best_ntree_limit"))
|
||||||
|
|
||||||
def num_boosted_rounds(self) -> int:
|
def num_boosted_rounds(self) -> int:
|
||||||
'''Get number of boosted rounds. For gblinear this is reset to 0 after
|
'''Get number of boosted rounds. For gblinear this is reset to 0 after
|
||||||
serializing the model.
|
serializing the model.
|
||||||
|
|||||||
@ -682,10 +682,16 @@ class XGBModel(XGBModelBase):
|
|||||||
self._set_evaluation_result(evals_result)
|
self._set_evaluation_result(evals_result)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def predict(self, data, output_margin=False, ntree_limit=None,
|
def predict(
|
||||||
validate_features=True, base_margin=None):
|
self,
|
||||||
|
X,
|
||||||
|
output_margin=False,
|
||||||
|
ntree_limit=None,
|
||||||
|
validate_features=True,
|
||||||
|
base_margin=None
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Predict with `data`.
|
Predict with `X`.
|
||||||
|
|
||||||
.. note:: This function is not thread safe.
|
.. note:: This function is not thread safe.
|
||||||
|
|
||||||
@ -699,7 +705,7 @@ class XGBModel(XGBModelBase):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
data : array_like
|
X : array_like
|
||||||
Data to predict with
|
Data to predict with
|
||||||
output_margin : bool
|
output_margin : bool
|
||||||
Whether to output the raw untransformed margin value.
|
Whether to output the raw untransformed margin value.
|
||||||
@ -718,16 +724,21 @@ class XGBModel(XGBModelBase):
|
|||||||
prediction : numpy array
|
prediction : numpy array
|
||||||
"""
|
"""
|
||||||
# pylint: disable=missing-docstring,invalid-name
|
# pylint: disable=missing-docstring,invalid-name
|
||||||
test_dmatrix = DMatrix(data, base_margin=base_margin,
|
test_dmatrix = DMatrix(X, base_margin=base_margin,
|
||||||
missing=self.missing, nthread=self.n_jobs)
|
missing=self.missing, nthread=self.n_jobs)
|
||||||
# get ntree_limit to use - if none specified, default to
|
# get ntree_limit to use - if none specified, default to
|
||||||
# best_ntree_limit if defined, otherwise 0.
|
# best_ntree_limit if defined, otherwise 0.
|
||||||
if ntree_limit is None:
|
if ntree_limit is None:
|
||||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
try:
|
||||||
return self.get_booster().predict(test_dmatrix,
|
ntree_limit = self.best_ntree_limit
|
||||||
output_margin=output_margin,
|
except AttributeError:
|
||||||
ntree_limit=ntree_limit,
|
ntree_limit = 0
|
||||||
validate_features=validate_features)
|
return self.get_booster().predict(
|
||||||
|
test_dmatrix,
|
||||||
|
output_margin=output_margin,
|
||||||
|
ntree_limit=ntree_limit,
|
||||||
|
validate_features=validate_features
|
||||||
|
)
|
||||||
|
|
||||||
def apply(self, X, ntree_limit=0):
|
def apply(self, X, ntree_limit=0):
|
||||||
"""Return the predicted leaf every tree for each sample.
|
"""Return the predicted leaf every tree for each sample.
|
||||||
@ -1037,50 +1048,21 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
'Fit gradient boosting model',
|
'Fit gradient boosting model',
|
||||||
'Fit gradient boosting classifier', 1)
|
'Fit gradient boosting classifier', 1)
|
||||||
|
|
||||||
def predict(self, data, output_margin=False, ntree_limit=None,
|
def predict(
|
||||||
validate_features=True, base_margin=None):
|
self,
|
||||||
"""
|
X,
|
||||||
Predict with `data`.
|
output_margin=False,
|
||||||
|
ntree_limit=None,
|
||||||
.. note:: This function is not thread safe.
|
validate_features=True,
|
||||||
|
base_margin=None
|
||||||
For each booster object, predict can only be called from one thread.
|
):
|
||||||
If you want to run prediction using multiple thread, call
|
class_probs = super().predict(
|
||||||
``xgb.copy()`` to make copies of model object and then call
|
X=X,
|
||||||
``predict()``.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
preds = bst.predict(dtest, ntree_limit=num_round)
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : array_like
|
|
||||||
Feature matrix.
|
|
||||||
output_margin : bool
|
|
||||||
Whether to output the raw untransformed margin value.
|
|
||||||
ntree_limit : int
|
|
||||||
Limit number of trees in the prediction; defaults to
|
|
||||||
best_ntree_limit if defined (i.e. it has been trained with early
|
|
||||||
stopping), otherwise 0 (use all trees).
|
|
||||||
validate_features : bool
|
|
||||||
When this is True, validate that the Booster's and data's
|
|
||||||
feature_names are identical. Otherwise, it is assumed that the
|
|
||||||
feature_names are the same.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
prediction : numpy array
|
|
||||||
"""
|
|
||||||
test_dmatrix = DMatrix(data, base_margin=base_margin,
|
|
||||||
missing=self.missing, nthread=self.n_jobs)
|
|
||||||
if ntree_limit is None:
|
|
||||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
|
||||||
class_probs = self.get_booster().predict(
|
|
||||||
test_dmatrix,
|
|
||||||
output_margin=output_margin,
|
output_margin=output_margin,
|
||||||
ntree_limit=ntree_limit,
|
ntree_limit=ntree_limit,
|
||||||
validate_features=validate_features)
|
validate_features=validate_features,
|
||||||
|
base_margin=base_margin
|
||||||
|
)
|
||||||
if output_margin:
|
if output_margin:
|
||||||
# If output_margin is active, simply return the scores
|
# If output_margin is active, simply return the scores
|
||||||
return class_probs
|
return class_probs
|
||||||
@ -1125,13 +1107,13 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
a numpy array of shape array-like of shape (n_samples, n_classes) with the
|
a numpy array of shape array-like of shape (n_samples, n_classes) with the
|
||||||
probability of each data example being of a given class.
|
probability of each data example being of a given class.
|
||||||
"""
|
"""
|
||||||
test_dmatrix = DMatrix(X, base_margin=base_margin,
|
class_probs = super().predict(
|
||||||
missing=self.missing, nthread=self.n_jobs)
|
X=X,
|
||||||
if ntree_limit is None:
|
output_margin=False,
|
||||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
ntree_limit=ntree_limit,
|
||||||
class_probs = self.get_booster().predict(test_dmatrix,
|
validate_features=validate_features,
|
||||||
ntree_limit=ntree_limit,
|
base_margin=base_margin
|
||||||
validate_features=validate_features)
|
)
|
||||||
return _cls_predict_proba(self.objective, class_probs, np.vstack)
|
return _cls_predict_proba(self.objective, class_probs, np.vstack)
|
||||||
|
|
||||||
def evals_result(self):
|
def evals_result(self):
|
||||||
@ -1493,18 +1475,3 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
|||||||
self.objective = params["objective"]
|
self.objective = params["objective"]
|
||||||
self._set_evaluation_result(evals_result)
|
self._set_evaluation_result(evals_result)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def predict(self, data, output_margin=False,
|
|
||||||
ntree_limit=0, validate_features=True, base_margin=None):
|
|
||||||
|
|
||||||
test_dmatrix = DMatrix(data, base_margin=base_margin,
|
|
||||||
missing=self.missing)
|
|
||||||
if ntree_limit is None:
|
|
||||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
|
||||||
|
|
||||||
return self.get_booster().predict(test_dmatrix,
|
|
||||||
output_margin=output_margin,
|
|
||||||
ntree_limit=ntree_limit,
|
|
||||||
validate_features=validate_features)
|
|
||||||
|
|
||||||
predict.__doc__ = XGBModel.predict.__doc__
|
|
||||||
|
|||||||
@ -88,12 +88,9 @@ def _train_internal(params, dtrain,
|
|||||||
if evals_result is not None and is_new_callback:
|
if evals_result is not None and is_new_callback:
|
||||||
evals_result.update(callbacks.history)
|
evals_result.update(callbacks.history)
|
||||||
|
|
||||||
if bst.attr('best_score') is not None:
|
# These should be moved into callback functions `after_training`, but until old
|
||||||
bst.best_score = float(bst.attr('best_score'))
|
# callbacks are removed, the train function is the only place for setting the
|
||||||
bst.best_iteration = int(bst.attr('best_iteration'))
|
# attributes.
|
||||||
else:
|
|
||||||
bst.best_iteration = bst.num_boosted_rounds() - 1
|
|
||||||
|
|
||||||
config = json.loads(bst.save_config())
|
config = json.loads(bst.save_config())
|
||||||
booster = config['learner']['gradient_booster']['name']
|
booster = config['learner']['gradient_booster']['name']
|
||||||
if booster == 'gblinear':
|
if booster == 'gblinear':
|
||||||
@ -114,7 +111,20 @@ def _train_internal(params, dtrain,
|
|||||||
|
|
||||||
num_groups = int(config['learner']['learner_model_param']['num_class'])
|
num_groups = int(config['learner']['learner_model_param']['num_class'])
|
||||||
num_groups = 1 if num_groups == 0 else num_groups
|
num_groups = 1 if num_groups == 0 else num_groups
|
||||||
bst.best_ntree_limit = ((bst.best_iteration + 1) * num_parallel_tree * num_groups)
|
if bst.attr('best_score') is not None:
|
||||||
|
bst.best_score = float(bst.attr('best_score'))
|
||||||
|
bst.best_iteration = int(bst.attr('best_iteration'))
|
||||||
|
bst.set_attr(
|
||||||
|
best_ntree_limit=str(
|
||||||
|
(bst.best_iteration + 1) * num_parallel_tree * num_groups
|
||||||
|
)
|
||||||
|
)
|
||||||
|
bst.best_ntree_limit = int(bst.attr("best_ntree_limit"))
|
||||||
|
else:
|
||||||
|
# Due to compatibility with version older than 1.4, these attributes are added
|
||||||
|
# to Python object even if early stopping is not used.
|
||||||
|
bst.best_iteration = bst.num_boosted_rounds() - 1
|
||||||
|
bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree * num_groups
|
||||||
|
|
||||||
# Copy to serialise and unserialise booster to reset state and free
|
# Copy to serialise and unserialise booster to reset state and free
|
||||||
# training memory
|
# training memory
|
||||||
@ -148,15 +158,16 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
|||||||
Activates early stopping. Validation metric needs to improve at least once in
|
Activates early stopping. Validation metric needs to improve at least once in
|
||||||
every **early_stopping_rounds** round(s) to continue training.
|
every **early_stopping_rounds** round(s) to continue training.
|
||||||
Requires at least one item in **evals**.
|
Requires at least one item in **evals**.
|
||||||
The method returns the model from the last iteration (not the best one).
|
The method returns the model from the last iteration (not the best one). Use
|
||||||
If there's more than one item in **evals**, the last entry will be used
|
custom callback or model slicing if the best model is desired.
|
||||||
for early stopping.
|
If there's more than one item in **evals**, the last entry will be used for early
|
||||||
|
stopping.
|
||||||
If there's more than one metric in the **eval_metric** parameter given in
|
If there's more than one metric in the **eval_metric** parameter given in
|
||||||
**params**, the last metric will be used for early stopping.
|
**params**, the last metric will be used for early stopping.
|
||||||
If early stopping occurs, the model will have three additional fields:
|
If early stopping occurs, the model will have three additional fields:
|
||||||
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
|
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. (Use
|
||||||
(Use ``bst.best_ntree_limit`` to get the correct value if
|
``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
|
||||||
``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
|
``num_class`` appears in the parameters)
|
||||||
evals_result: dict
|
evals_result: dict
|
||||||
This dictionary stores the evaluation results of all the items in watchlist.
|
This dictionary stores the evaluation results of all the items in watchlist.
|
||||||
|
|
||||||
|
|||||||
@ -341,6 +341,25 @@ class TestModels:
|
|||||||
'objective': 'multi:softmax'}
|
'objective': 'multi:softmax'}
|
||||||
validate_model(parameters)
|
validate_model(parameters)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_attributes(self):
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
X, y = load_iris(return_X_y=True)
|
||||||
|
cls = xgb.XGBClassifier(n_estimators=2)
|
||||||
|
cls.fit(X, y, early_stopping_rounds=1, eval_set=[(X, y)])
|
||||||
|
assert cls.get_booster().best_ntree_limit == 2 * cls.n_classes_
|
||||||
|
assert cls.best_ntree_limit == cls.get_booster().best_ntree_limit
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
path = os.path.join(tmpdir, "cls.json")
|
||||||
|
cls.save_model(path)
|
||||||
|
|
||||||
|
cls = xgb.XGBClassifier(n_estimators=2)
|
||||||
|
cls.load_model(path)
|
||||||
|
assert cls.get_booster().best_ntree_limit == 2 * cls.n_classes_
|
||||||
|
assert cls.best_ntree_limit == cls.get_booster().best_ntree_limit
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
@pytest.mark.parametrize('booster', ['gbtree', 'dart'])
|
@pytest.mark.parametrize('booster', ['gbtree', 'dart'])
|
||||||
def test_slice(self, booster):
|
def test_slice(self, booster):
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user