Handle np integer in model slice and prediction. (#10007)
This commit is contained in:
@@ -36,6 +36,11 @@ PandasDType = Any # real type is pandas.core.dtypes.base.ExtensionDtype
|
||||
|
||||
FloatCompatible = Union[float, np.float32, np.float64]
|
||||
|
||||
# typing.SupportsInt is not suitable here since floating point values are convertible to
|
||||
# integers as well.
|
||||
Integer = Union[int, np.integer]
|
||||
IterationRange = Tuple[Integer, Integer]
|
||||
|
||||
# callables
|
||||
FPreProcCallable = Callable
|
||||
|
||||
|
||||
@@ -48,6 +48,8 @@ from ._typing import (
|
||||
FeatureInfo,
|
||||
FeatureNames,
|
||||
FeatureTypes,
|
||||
Integer,
|
||||
IterationRange,
|
||||
ModelIn,
|
||||
NumpyOrCupy,
|
||||
TransformedData,
|
||||
@@ -1812,19 +1814,25 @@ class Booster:
|
||||
state["handle"] = handle
|
||||
self.__dict__.update(state)
|
||||
|
||||
def __getitem__(self, val: Union[int, tuple, slice]) -> "Booster":
|
||||
def __getitem__(self, val: Union[Integer, tuple, slice]) -> "Booster":
|
||||
"""Get a slice of the tree-based model.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
"""
|
||||
if isinstance(val, int):
|
||||
val = slice(val, val + 1)
|
||||
# convert to slice for all other types
|
||||
if isinstance(val, (np.integer, int)):
|
||||
val = slice(int(val), int(val + 1))
|
||||
if isinstance(val, type(Ellipsis)):
|
||||
val = slice(0, 0)
|
||||
if isinstance(val, tuple):
|
||||
raise ValueError("Only supports slicing through 1 dimension.")
|
||||
# All supported types are now slice
|
||||
# FIXME(jiamingy): Use `types.EllipsisType` once Python 3.10 is used.
|
||||
if not isinstance(val, slice):
|
||||
msg = _expect((int, slice), type(val))
|
||||
msg = _expect((int, slice, np.integer, type(Ellipsis)), type(val))
|
||||
raise TypeError(msg)
|
||||
|
||||
if isinstance(val.start, type(Ellipsis)) or val.start is None:
|
||||
start = 0
|
||||
else:
|
||||
@@ -2246,12 +2254,13 @@ class Booster:
|
||||
pred_interactions: bool = False,
|
||||
validate_features: bool = True,
|
||||
training: bool = False,
|
||||
iteration_range: Tuple[int, int] = (0, 0),
|
||||
iteration_range: IterationRange = (0, 0),
|
||||
strict_shape: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""Predict with data. The full model will be used unless `iteration_range` is specified,
|
||||
meaning user have to either slice the model or use the ``best_iteration``
|
||||
attribute to get prediction from best model returned from early stopping.
|
||||
"""Predict with data. The full model will be used unless `iteration_range` is
|
||||
specified, meaning user have to either slice the model or use the
|
||||
``best_iteration`` attribute to get prediction from best model returned from
|
||||
early stopping.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -2336,8 +2345,8 @@ class Booster:
|
||||
args = {
|
||||
"type": 0,
|
||||
"training": training,
|
||||
"iteration_begin": iteration_range[0],
|
||||
"iteration_end": iteration_range[1],
|
||||
"iteration_begin": int(iteration_range[0]),
|
||||
"iteration_end": int(iteration_range[1]),
|
||||
"strict_shape": strict_shape,
|
||||
}
|
||||
|
||||
@@ -2373,7 +2382,7 @@ class Booster:
|
||||
def inplace_predict(
|
||||
self,
|
||||
data: DataType,
|
||||
iteration_range: Tuple[int, int] = (0, 0),
|
||||
iteration_range: IterationRange = (0, 0),
|
||||
predict_type: str = "value",
|
||||
missing: float = np.nan,
|
||||
validate_features: bool = True,
|
||||
@@ -2439,8 +2448,8 @@ class Booster:
|
||||
args = make_jcargs(
|
||||
type=1 if predict_type == "margin" else 0,
|
||||
training=False,
|
||||
iteration_begin=iteration_range[0],
|
||||
iteration_end=iteration_range[1],
|
||||
iteration_begin=int(iteration_range[0]),
|
||||
iteration_end=int(iteration_range[1]),
|
||||
missing=missing,
|
||||
strict_shape=strict_shape,
|
||||
cache_id=0,
|
||||
|
||||
@@ -61,7 +61,7 @@ from typing import (
|
||||
import numpy
|
||||
|
||||
from xgboost import collective, config
|
||||
from xgboost._typing import _T, FeatureNames, FeatureTypes
|
||||
from xgboost._typing import _T, FeatureNames, FeatureTypes, IterationRange
|
||||
from xgboost.callback import TrainingCallback
|
||||
from xgboost.compat import DataFrame, LazyLoader, concat, lazy_isinstance
|
||||
from xgboost.core import (
|
||||
@@ -1263,7 +1263,7 @@ async def _predict_async(
|
||||
approx_contribs: bool,
|
||||
pred_interactions: bool,
|
||||
validate_features: bool,
|
||||
iteration_range: Tuple[int, int],
|
||||
iteration_range: IterationRange,
|
||||
strict_shape: bool,
|
||||
) -> _DaskCollection:
|
||||
_booster = await _get_model_future(client, model)
|
||||
@@ -1410,7 +1410,7 @@ def predict( # pylint: disable=unused-argument
|
||||
approx_contribs: bool = False,
|
||||
pred_interactions: bool = False,
|
||||
validate_features: bool = True,
|
||||
iteration_range: Tuple[int, int] = (0, 0),
|
||||
iteration_range: IterationRange = (0, 0),
|
||||
strict_shape: bool = False,
|
||||
) -> Any:
|
||||
"""Run prediction with a trained booster.
|
||||
@@ -1458,7 +1458,7 @@ async def _inplace_predict_async( # pylint: disable=too-many-branches
|
||||
global_config: Dict[str, Any],
|
||||
model: Union[Booster, Dict, "distributed.Future"],
|
||||
data: _DataT,
|
||||
iteration_range: Tuple[int, int],
|
||||
iteration_range: IterationRange,
|
||||
predict_type: str,
|
||||
missing: float,
|
||||
validate_features: bool,
|
||||
@@ -1516,7 +1516,7 @@ def inplace_predict( # pylint: disable=unused-argument
|
||||
client: Optional["distributed.Client"],
|
||||
model: Union[TrainReturnT, Booster, "distributed.Future"],
|
||||
data: _DataT,
|
||||
iteration_range: Tuple[int, int] = (0, 0),
|
||||
iteration_range: IterationRange = (0, 0),
|
||||
predict_type: str = "value",
|
||||
missing: float = numpy.nan,
|
||||
validate_features: bool = True,
|
||||
@@ -1624,7 +1624,7 @@ class DaskScikitLearnBase(XGBModel):
|
||||
output_margin: bool,
|
||||
validate_features: bool,
|
||||
base_margin: Optional[_DaskCollection],
|
||||
iteration_range: Optional[Tuple[int, int]],
|
||||
iteration_range: Optional[IterationRange],
|
||||
) -> Any:
|
||||
iteration_range = self._get_iteration_range(iteration_range)
|
||||
if self._can_use_inplace_predict():
|
||||
@@ -1664,7 +1664,7 @@ class DaskScikitLearnBase(XGBModel):
|
||||
output_margin: bool = False,
|
||||
validate_features: bool = True,
|
||||
base_margin: Optional[_DaskCollection] = None,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> Any:
|
||||
_assert_dask_support()
|
||||
return self.client.sync(
|
||||
@@ -1679,7 +1679,7 @@ class DaskScikitLearnBase(XGBModel):
|
||||
async def _apply_async(
|
||||
self,
|
||||
X: _DataT,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> Any:
|
||||
iteration_range = self._get_iteration_range(iteration_range)
|
||||
test_dmatrix = await DaskDMatrix(
|
||||
@@ -1700,7 +1700,7 @@ class DaskScikitLearnBase(XGBModel):
|
||||
def apply(
|
||||
self,
|
||||
X: _DataT,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> Any:
|
||||
_assert_dask_support()
|
||||
return self.client.sync(self._apply_async, X, iteration_range=iteration_range)
|
||||
@@ -1962,7 +1962,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
X: _DataT,
|
||||
validate_features: bool,
|
||||
base_margin: Optional[_DaskCollection],
|
||||
iteration_range: Optional[Tuple[int, int]],
|
||||
iteration_range: Optional[IterationRange],
|
||||
) -> _DaskCollection:
|
||||
if self.objective == "multi:softmax":
|
||||
raise ValueError(
|
||||
@@ -1987,7 +1987,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
X: _DaskCollection,
|
||||
validate_features: bool = True,
|
||||
base_margin: Optional[_DaskCollection] = None,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> Any:
|
||||
_assert_dask_support()
|
||||
return self._client_sync(
|
||||
@@ -2006,7 +2006,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
output_margin: bool,
|
||||
validate_features: bool,
|
||||
base_margin: Optional[_DaskCollection],
|
||||
iteration_range: Optional[Tuple[int, int]],
|
||||
iteration_range: Optional[IterationRange],
|
||||
) -> _DaskCollection:
|
||||
pred_probs = await super()._predict_async(
|
||||
data, output_margin, validate_features, base_margin, iteration_range
|
||||
|
||||
@@ -22,7 +22,7 @@ from typing import (
|
||||
import numpy as np
|
||||
from scipy.special import softmax
|
||||
|
||||
from ._typing import ArrayLike, FeatureNames, FeatureTypes, ModelIn
|
||||
from ._typing import ArrayLike, FeatureNames, FeatureTypes, IterationRange, ModelIn
|
||||
from .callback import TrainingCallback
|
||||
|
||||
# Do not use class names on scikit-learn directly. Re-define the classes on
|
||||
@@ -1039,8 +1039,8 @@ class XGBModel(XGBModelBase):
|
||||
return False
|
||||
|
||||
def _get_iteration_range(
|
||||
self, iteration_range: Optional[Tuple[int, int]]
|
||||
) -> Tuple[int, int]:
|
||||
self, iteration_range: Optional[IterationRange]
|
||||
) -> IterationRange:
|
||||
if iteration_range is None or iteration_range[1] == 0:
|
||||
# Use best_iteration if defined.
|
||||
try:
|
||||
@@ -1057,7 +1057,7 @@ class XGBModel(XGBModelBase):
|
||||
output_margin: bool = False,
|
||||
validate_features: bool = True,
|
||||
base_margin: Optional[ArrayLike] = None,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> ArrayLike:
|
||||
"""Predict with `X`. If the model is trained with early stopping, then
|
||||
:py:attr:`best_iteration` is used automatically. The estimator uses
|
||||
@@ -1129,7 +1129,7 @@ class XGBModel(XGBModelBase):
|
||||
def apply(
|
||||
self,
|
||||
X: ArrayLike,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> np.ndarray:
|
||||
"""Return the predicted leaf every tree for each sample. If the model is trained
|
||||
with early stopping, then :py:attr:`best_iteration` is used automatically.
|
||||
@@ -1465,7 +1465,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
output_margin: bool = False,
|
||||
validate_features: bool = True,
|
||||
base_margin: Optional[ArrayLike] = None,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> ArrayLike:
|
||||
with config_context(verbosity=self.verbosity):
|
||||
class_probs = super().predict(
|
||||
@@ -1500,7 +1500,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
X: ArrayLike,
|
||||
validate_features: bool = True,
|
||||
base_margin: Optional[ArrayLike] = None,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> np.ndarray:
|
||||
"""Predict the probability of each `X` example being of a given class. If the
|
||||
model is trained with early stopping, then :py:attr:`best_iteration` is used
|
||||
@@ -1942,7 +1942,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
||||
output_margin: bool = False,
|
||||
validate_features: bool = True,
|
||||
base_margin: Optional[ArrayLike] = None,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> ArrayLike:
|
||||
X, _ = _get_qid(X, None)
|
||||
return super().predict(
|
||||
@@ -1956,7 +1956,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
||||
def apply(
|
||||
self,
|
||||
X: ArrayLike,
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
iteration_range: Optional[IterationRange] = None,
|
||||
) -> ArrayLike:
|
||||
X, _ = _get_qid(X, None)
|
||||
return super().apply(X, iteration_range)
|
||||
|
||||
Reference in New Issue
Block a user