From 43878b10b65c9e8caaa61d5945d76dfc613fa0e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 12:47:13 +0800 Subject: [PATCH 1/5] Bump maven-deploy-plugin in /jvm-packages/xgboost4j-spark-gpu (#8973) Bumps [maven-deploy-plugin](https://github.com/apache/maven-deploy-plugin) from 3.0.0 to 3.1.1. - [Release notes](https://github.com/apache/maven-deploy-plugin/releases) - [Commits](https://github.com/apache/maven-deploy-plugin/compare/maven-deploy-plugin-3.0.0...maven-deploy-plugin-3.1.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-deploy-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index a5d219040..facb955ce 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -252,7 +252,7 @@ org.apache.maven.plugins maven-deploy-plugin - 3.0.0 + 3.1.1 internal.repo::default::file://${project.build.directory}/mvn-repo From 90645c4957359958314f5f75141797e5640e3dcd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Mar 2023 16:03:45 +0800 Subject: [PATCH 2/5] Bump maven-resources-plugin from 3.3.0 to 3.3.1 in /jvm-packages (#8980) Bumps [maven-resources-plugin](https://github.com/apache/maven-resources-plugin) from 3.3.0 to 3.3.1. - [Release notes](https://github.com/apache/maven-resources-plugin/releases) - [Commits](https://github.com/apache/maven-resources-plugin/compare/maven-resources-plugin-3.3.0...maven-resources-plugin-3.3.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-resources-plugin dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/xgboost4j-gpu/pom.xml | 2 +- jvm-packages/xgboost4j/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 1da88c3cc..167635209 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -121,7 +121,7 @@ org.apache.maven.plugins maven-resources-plugin - 3.3.0 + 3.3.1 dll diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 946b11108..aa8694751 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -107,7 +107,7 @@ org.apache.maven.plugins maven-resources-plugin - 3.3.0 + 3.3.1 dll From c2b3a13e709c1f726cef1ebfd987a98809aafe48 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Mar 2023 21:34:10 +0800 Subject: [PATCH 3/5] [breaking][skl] Remove parameter serialization. (#8963) - Remove parameter serialization in the scikit-learn interface. The scikit-lear interface `save_model` will save only the model and discard all hyper-parameters. This is to align with the native XGBoost interface, which distinguishes the hyper-parameter and model parameters. With the scikit-learn interface, model parameters are attributes of the estimator. For instance, `n_features_in_`, `n_classes_` are always accessible with `estimator.n_features_in_` and `estimator.n_classes_`, but not with the `estimator.get_params`. - Define a `load_model` method for classifier to load its own attributes. - Set n_estimators to None by default. --- python-package/xgboost/_typing.py | 2 + python-package/xgboost/compat.py | 25 ---- python-package/xgboost/core.py | 3 +- python-package/xgboost/dask.py | 9 +- python-package/xgboost/libpath.py | 2 +- python-package/xgboost/sklearn.py | 183 ++++++++++------------- python-package/xgboost/spark/core.py | 2 + tests/python/test_model_compatibility.py | 1 - tests/python/test_with_sklearn.py | 67 ++++++--- 9 files changed, 134 insertions(+), 160 deletions(-) diff --git a/python-package/xgboost/_typing.py b/python-package/xgboost/_typing.py index 0adad9478..774681031 100644 --- a/python-package/xgboost/_typing.py +++ b/python-package/xgboost/_typing.py @@ -43,6 +43,8 @@ FPreProcCallable = Callable # c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h c_bst_ulong = ctypes.c_uint64 # pylint: disable=C0103 +ModelIn = Union[str, bytearray, os.PathLike] + CTypeT = TypeVar( "CTypeT", ctypes.c_void_p, diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 3be023abf..a01eeef09 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -88,31 +88,6 @@ def is_cudf_available() -> bool: return False -class XGBoostLabelEncoder(LabelEncoder): - """Label encoder with JSON serialization methods.""" - - def to_json(self) -> Dict: - """Returns a JSON compatible dictionary""" - meta = {} - for k, v in self.__dict__.items(): - if isinstance(v, np.ndarray): - meta[k] = v.tolist() - else: - meta[k] = v - return meta - - def from_json(self, doc: Dict) -> None: - # pylint: disable=attribute-defined-outside-init - """Load the encoder back from a JSON compatible dict.""" - meta = {} - for k, v in doc.items(): - if k == "classes_": - self.classes_ = np.array(v) - continue - meta[k] = v - self.__dict__.update(meta) - - try: import scipy.sparse as scipy_sparse from scipy.sparse import csr_matrix as scipy_csr diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 5a0cfb3a2..a0393391e 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -47,6 +47,7 @@ from ._typing import ( FeatureInfo, FeatureNames, FeatureTypes, + ModelIn, NumpyOrCupy, c_bst_ulong, ) @@ -2477,7 +2478,7 @@ class Booster: ) return ctypes2buffer(cptr, length.value) - def load_model(self, fname: Union[str, bytearray, os.PathLike]) -> None: + def load_model(self, fname: ModelIn) -> None: """Load the model from a file or bytearray. Path to file can be local or as an URI. diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 0e5e0d28e..a17fbad70 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -60,7 +60,7 @@ from typing import ( import numpy from . import collective, config -from ._typing import _T, FeatureNames, FeatureTypes +from ._typing import _T, FeatureNames, FeatureTypes, ModelIn from .callback import TrainingCallback from .compat import DataFrame, LazyLoader, concat, lazy_isinstance from .core import ( @@ -76,6 +76,7 @@ from .core import ( from .sklearn import ( XGBClassifier, XGBClassifierBase, + XGBClassifierMixIn, XGBModel, XGBRanker, XGBRankerMixIn, @@ -1839,7 +1840,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase): "Implementation of the scikit-learn API for XGBoost classification.", ["estimators", "model"], ) -class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): +class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase): # pylint: disable=missing-class-docstring async def _fit_async( self, @@ -2019,6 +2020,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): preds = da.map_blocks(_argmax, pred_probs, drop_axis=1) return preds + def load_model(self, fname: ModelIn) -> None: + super().load_model(fname) + self._load_model_attributes(self.get_booster()) + @xgboost_model_doc( """Implementation of the Scikit-Learn API for XGBoost Ranking. diff --git a/python-package/xgboost/libpath.py b/python-package/xgboost/libpath.py index 9223acaa5..be37b364e 100644 --- a/python-package/xgboost/libpath.py +++ b/python-package/xgboost/libpath.py @@ -55,7 +55,7 @@ def find_lib_path() -> List[str]: # XGBOOST_BUILD_DOC is defined by sphinx conf. if not lib_path and not os.environ.get("XGBOOST_BUILD_DOC", False): - link = "https://xgboost.readthedocs.io/en/latest/build.html" + link = "https://xgboost.readthedocs.io/en/stable/install.html" msg = ( "Cannot find XGBoost Library in the candidate path. " + "List of candidates:\n- " diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 52175981a..563ff8659 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -22,23 +22,18 @@ from typing import ( import numpy as np from scipy.special import softmax -from ._typing import ArrayLike, FeatureNames, FeatureTypes +from ._typing import ArrayLike, FeatureNames, FeatureTypes, ModelIn from .callback import TrainingCallback # Do not use class names on scikit-learn directly. Re-define the classes on # .compat to guarantee the behavior without scikit-learn -from .compat import ( - SKLEARN_INSTALLED, - XGBClassifierBase, - XGBModelBase, - XGBoostLabelEncoder, - XGBRegressorBase, -) +from .compat import SKLEARN_INSTALLED, XGBClassifierBase, XGBModelBase, XGBRegressorBase from .config import config_context from .core import ( Booster, DMatrix, Metric, + Objective, QuantileDMatrix, XGBoostError, _convert_ntree_limit, @@ -49,9 +44,24 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df from .training import train +class XGBClassifierMixIn: # pylint: disable=too-few-public-methods + """MixIn for classification.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + def _load_model_attributes(self, booster: Booster) -> None: + config = json.loads(booster.save_config()) + self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"]) + # binary classification is treated as regression in XGBoost. + self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_ + + class XGBRankerMixIn: # pylint: disable=too-few-public-methods - """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn base - classes.""" + """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn + base classes. + + """ _estimator_type = "ranker" @@ -74,7 +84,7 @@ SklObjective = Optional[ def _objective_decorator( func: Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]] -) -> Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]: +) -> Objective: """Decorate an objective function Converts an objective function using the typical sklearn metrics @@ -173,7 +183,7 @@ def ltr_metric_decorator(func: Callable, n_jobs: Optional[int]) -> Metric: __estimator_doc = """ - n_estimators : int + n_estimators : Optional[int] Number of gradient boosted trees. Equivalent to number of boosting rounds. """ @@ -598,6 +608,9 @@ def _wrap_evaluation_matrices( return train_dmatrix, evals +DEFAULT_N_ESTIMATORS = 100 + + @xgboost_model_doc( """Implementation of the Scikit-Learn API for XGBoost.""", ["estimators", "model", "objective"], @@ -611,7 +624,7 @@ class XGBModel(XGBModelBase): max_bin: Optional[int] = None, grow_policy: Optional[str] = None, learning_rate: Optional[float] = None, - n_estimators: int = 100, + n_estimators: Optional[int] = None, verbosity: Optional[int] = None, objective: SklObjective = None, booster: Optional[str] = None, @@ -797,7 +810,7 @@ class XGBModel(XGBModelBase): def get_num_boosting_rounds(self) -> int: """Gets the number of xgboost boosting rounds.""" - return self.n_estimators + return DEFAULT_N_ESTIMATORS if self.n_estimators is None else self.n_estimators def _get_type(self) -> str: if not hasattr(self, "_estimator_type"): @@ -809,72 +822,33 @@ class XGBModel(XGBModelBase): def save_model(self, fname: Union[str, os.PathLike]) -> None: meta: Dict[str, Any] = {} - for k, v in self.__dict__.items(): - if k == "_le": - meta["_le"] = self._le.to_json() - continue - if k == "_Booster": - continue - if k == "classes_": - # numpy array is not JSON serializable - meta["classes_"] = self.classes_.tolist() - continue - if k == "feature_types": - # Use the `feature_types` attribute from booster instead. - meta["feature_types"] = None - continue - try: - json.dumps({k: v}) - meta[k] = v - except TypeError: - warnings.warn( - str(k) + " is not saved in Scikit-Learn meta.", UserWarning - ) + # For validation. meta["_estimator_type"] = self._get_type() meta_str = json.dumps(meta) self.get_booster().set_attr(scikit_learn=meta_str) self.get_booster().save_model(fname) - # Delete the attribute after save self.get_booster().set_attr(scikit_learn=None) save_model.__doc__ = f"""{Booster.save_model.__doc__}""" - def load_model(self, fname: Union[str, bytearray, os.PathLike]) -> None: + def load_model(self, fname: ModelIn) -> None: # pylint: disable=attribute-defined-outside-init - if not hasattr(self, "_Booster"): + if not self.__sklearn_is_fitted__(): self._Booster = Booster({"n_jobs": self.n_jobs}) self.get_booster().load_model(fname) + meta_str = self.get_booster().attr("scikit_learn") if meta_str is None: - # FIXME(jiaming): This doesn't have to be a problem as most of the needed - # information like num_class and objective is in Learner class. - warnings.warn("Loading a native XGBoost model with Scikit-Learn interface.") return + meta = json.loads(meta_str) - states = {} - for k, v in meta.items(): - if k == "_le": - self._le = XGBoostLabelEncoder() - self._le.from_json(v) - continue - # FIXME(jiaming): This can be removed once label encoder is gone since we can - # generate it from `np.arange(self.n_classes_)` - if k == "classes_": - self.classes_ = np.array(v) - continue - if k == "feature_types": - self.feature_types = self.get_booster().feature_types - continue - if k == "_estimator_type": - if self._get_type() != v: - raise TypeError( - "Loading an estimator with different type. " - f"Expecting: {self._get_type()}, got: {v}" - ) - continue - states[k] = v - self.__dict__.update(states) - # Delete the attribute after load + t = meta.get("_estimator_type", None) + if t is not None and t != self._get_type(): + raise TypeError( + "Loading an estimator with different type. Expecting: " + f"{self._get_type()}, got: {t}" + ) + self.feature_types = self.get_booster().feature_types self.get_booster().set_attr(scikit_learn=None) load_model.__doc__ = f"""{Booster.load_model.__doc__}""" @@ -965,7 +939,6 @@ class XGBModel(XGBModelBase): "Experimental support for categorical data is not implemented for" " current tree method yet." ) - return model, metric, params, early_stopping_rounds, callbacks def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix: @@ -1086,9 +1059,7 @@ class XGBModel(XGBModelBase): params = self.get_xgb_params() if callable(self.objective): - obj: Optional[ - Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]] - ] = _objective_decorator(self.objective) + obj: Optional[Objective] = _objective_decorator(self.objective) params["objective"] = "reg:squarederror" else: obj = None @@ -1304,8 +1275,10 @@ class XGBModel(XGBModelBase): @property def feature_names_in_(self) -> np.ndarray: - """Names of features seen during :py:meth:`fit`. Defined only when `X` has feature - names that are all strings.""" + """Names of features seen during :py:meth:`fit`. Defined only when `X` has + feature names that are all strings. + + """ feature_names = self.get_booster().feature_names if feature_names is None: raise AttributeError( @@ -1453,26 +1426,19 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> "Implementation of the scikit-learn API for XGBoost classification.", ["model", "objective"], extra_parameters=""" - n_estimators : int + n_estimators : Optional[int] Number of boosting rounds. """, ) -class XGBClassifier(XGBModel, XGBClassifierBase): +class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes @_deprecate_positional_args def __init__( self, *, objective: SklObjective = "binary:logistic", - use_label_encoder: Optional[bool] = None, **kwargs: Any, ) -> None: - # must match the parameters for `get_params` - self.use_label_encoder = use_label_encoder - if use_label_encoder is True: - raise ValueError("Label encoder was removed in 1.6.0.") - if use_label_encoder is not None: - warnings.warn("`use_label_encoder` is deprecated in 1.7.0.") super().__init__(objective=objective, **kwargs) @_deprecate_positional_args @@ -1496,38 +1462,38 @@ class XGBClassifier(XGBModel, XGBClassifierBase): # pylint: disable = attribute-defined-outside-init,too-many-statements with config_context(verbosity=self.verbosity): evals_result: TrainingCallback.EvalsLog = {} - + # We keep the n_classes_ as a simple member instead of loading it from + # booster in a Python property. This way we can have efficient and + # thread-safe prediction. if _is_cudf_df(y) or _is_cudf_ser(y): import cupy as cp # pylint: disable=E0401 - self.classes_ = cp.unique(y.values) - self.n_classes_ = len(self.classes_) - expected_classes = cp.arange(self.n_classes_) + classes = cp.unique(y.values) + self.n_classes_ = len(classes) + expected_classes = cp.array(self.classes_) elif _is_cupy_array(y): import cupy as cp # pylint: disable=E0401 - self.classes_ = cp.unique(y) - self.n_classes_ = len(self.classes_) - expected_classes = cp.arange(self.n_classes_) + classes = cp.unique(y) + self.n_classes_ = len(classes) + expected_classes = cp.array(self.classes_) else: - self.classes_ = np.unique(np.asarray(y)) - self.n_classes_ = len(self.classes_) - expected_classes = np.arange(self.n_classes_) + classes = np.unique(np.asarray(y)) + self.n_classes_ = len(classes) + expected_classes = self.classes_ if ( - self.classes_.shape != expected_classes.shape - or not (self.classes_ == expected_classes).all() + classes.shape != expected_classes.shape + or not (classes == expected_classes).all() ): raise ValueError( f"Invalid classes inferred from unique values of `y`. " - f"Expected: {expected_classes}, got {self.classes_}" + f"Expected: {expected_classes}, got {classes}" ) params = self.get_xgb_params() if callable(self.objective): - obj: Optional[ - Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]] - ] = _objective_decorator(self.objective) + obj: Optional[Objective] = _objective_decorator(self.objective) # Use default value. Is it really not used ? params["objective"] = "binary:logistic" else: @@ -1616,7 +1582,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): if len(class_probs.shape) > 1 and self.n_classes_ != 2: # multi-class, turns softprob into softmax - column_indexes: np.ndarray = np.argmax(class_probs, axis=1) # type: ignore + column_indexes: np.ndarray = np.argmax(class_probs, axis=1) elif len(class_probs.shape) > 1 and class_probs.shape[1] != 1: # multi-label column_indexes = np.zeros(class_probs.shape) @@ -1628,8 +1594,6 @@ class XGBClassifier(XGBModel, XGBClassifierBase): column_indexes = np.repeat(0, class_probs.shape[0]) column_indexes[class_probs > 0.5] = 1 - if hasattr(self, "_le"): - return self._le.inverse_transform(column_indexes) return column_indexes def predict_proba( @@ -1693,17 +1657,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase): base_margin=base_margin, iteration_range=iteration_range, ) - # If model is loaded from a raw booster there's no `n_classes_` - return _cls_predict_proba( - getattr(self, "n_classes_", 0), class_probs, np.vstack - ) + return _cls_predict_proba(self.n_classes_, class_probs, np.vstack) + + @property + def classes_(self) -> np.ndarray: + return np.arange(self.n_classes_) + + def load_model(self, fname: ModelIn) -> None: + super().load_model(fname) + self._load_model_attributes(self.get_booster()) @xgboost_model_doc( "scikit-learn API for XGBoost random forest classification.", ["model", "objective"], extra_parameters=""" - n_estimators : int + n_estimators : Optional[int] Number of trees in random forest to fit. """, ) @@ -1730,7 +1699,7 @@ class XGBRFClassifier(XGBClassifier): def get_xgb_params(self) -> Dict[str, Any]: params = super().get_xgb_params() - params["num_parallel_tree"] = self.n_estimators + params["num_parallel_tree"] = super().get_num_boosting_rounds() return params def get_num_boosting_rounds(self) -> int: @@ -1778,7 +1747,7 @@ class XGBRegressor(XGBModel, XGBRegressorBase): "scikit-learn API for XGBoost random forest regression.", ["model", "objective"], extra_parameters=""" - n_estimators : int + n_estimators : Optional[int] Number of trees in random forest to fit. """, ) @@ -1805,7 +1774,7 @@ class XGBRFRegressor(XGBRegressor): def get_xgb_params(self) -> Dict[str, Any]: params = super().get_xgb_params() - params["num_parallel_tree"] = self.n_estimators + params["num_parallel_tree"] = super().get_num_boosting_rounds() return params def get_num_boosting_rounds(self) -> int: diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 745c9348f..1a614f51f 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -39,6 +39,7 @@ import xgboost from xgboost import XGBClassifier, XGBRanker, XGBRegressor from xgboost.compat import is_cudf_available from xgboost.core import Booster +from xgboost.sklearn import DEFAULT_N_ESTIMATORS from xgboost.training import train as worker_train from .data import ( @@ -215,6 +216,7 @@ class _SparkXGBParams( filtered_params_dict = { k: params_dict[k] for k in params_dict if k not in _unsupported_xgb_params } + filtered_params_dict["n_estimators"] = DEFAULT_N_ESTIMATORS return filtered_params_dict def _set_xgb_params_default(self): diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py index a46715e42..c9b7646ef 100644 --- a/tests/python/test_model_compatibility.py +++ b/tests/python/test_model_compatibility.py @@ -66,7 +66,6 @@ def run_scikit_model_check(name, path): cls.load_model(path) if name.find('0.90') == -1: assert len(cls.classes_) == gm.kClasses - assert len(cls._le.classes_) == gm.kClasses assert cls.n_classes_ == gm.kClasses assert (len(cls.get_booster().get_dump()) == gm.kRounds * gm.kForests * gm.kClasses), path diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index c34b7d2d1..90d4dff18 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -38,36 +38,34 @@ def test_binary_classification(): assert err < 0.1 -@pytest.mark.parametrize('objective', ['multi:softmax', 'multi:softprob']) +@pytest.mark.parametrize("objective", ["multi:softmax", "multi:softprob"]) def test_multiclass_classification(objective): from sklearn.datasets import load_iris from sklearn.model_selection import KFold def check_pred(preds, labels, output_margin): if output_margin: - err = sum(1 for i in range(len(preds)) - if preds[i].argmax() != labels[i]) / float(len(preds)) + err = sum( + 1 for i in range(len(preds)) if preds[i].argmax() != labels[i] + ) / float(len(preds)) else: - err = sum(1 for i in range(len(preds)) - if preds[i] != labels[i]) / float(len(preds)) + err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float( + len(preds) + ) assert err < 0.4 - iris = load_iris() - y = iris['target'] - X = iris['data'] + X, y = load_iris(return_X_y=True) kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X, y): - xgb_model = xgb.XGBClassifier(objective=objective).fit(X[train_index], y[train_index]) - assert (xgb_model.get_booster().num_boosted_rounds() == - xgb_model.n_estimators) + xgb_model = xgb.XGBClassifier(objective=objective).fit( + X[train_index], y[train_index] + ) + assert xgb_model.get_booster().num_boosted_rounds() == 100 preds = xgb_model.predict(X[test_index]) # test other params in XGBClassifier().fit - preds2 = xgb_model.predict(X[test_index], output_margin=True, - ntree_limit=3) - preds3 = xgb_model.predict(X[test_index], output_margin=True, - ntree_limit=0) - preds4 = xgb_model.predict(X[test_index], output_margin=False, - ntree_limit=3) + preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3) + preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0) + preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3) labels = y[test_index] check_pred(preds, labels, output_margin=False) @@ -761,9 +759,9 @@ def test_parameters_access(): clf = save_load(clf) assert clf.tree_method is None - assert clf.n_estimators == 2 + assert clf.n_estimators is None assert clf.get_params()["tree_method"] is None - assert clf.get_params()["n_estimators"] == 2 + assert clf.get_params()["n_estimators"] is None assert get_tm(clf) == "auto" # discarded for save/load_model clf.set_params(tree_method="hist") @@ -771,9 +769,7 @@ def test_parameters_access(): clf = pickle.loads(pickle.dumps(clf)) assert clf.get_params()["tree_method"] == "hist" clf = save_load(clf) - # FIXME(jiamingy): We should remove this behavior once we remove parameters - # serialization for skl save/load_model. - assert clf.get_params()["tree_method"] == "hist" + assert clf.get_params()["tree_method"] is None def test_kwargs_error(): @@ -902,6 +898,7 @@ def save_load_model(model_path): xgb_model.load_model(model_path) assert isinstance(xgb_model.classes_, np.ndarray) + np.testing.assert_equal(xgb_model.classes_, np.array([0, 1])) assert isinstance(xgb_model._Booster, xgb.Booster) preds = xgb_model.predict(X[test_index]) @@ -933,8 +930,10 @@ def test_save_load_model(): save_load_model(model_path) from sklearn.datasets import load_digits + from sklearn.model_selection import train_test_split + with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model.json') + model_path = os.path.join(tempdir, 'digits.model.ubj') digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] @@ -959,6 +958,28 @@ def test_save_load_model(): predt_1 = cls.predict(X) assert np.allclose(predt_0, predt_1) + # mclass + X, y = load_digits(n_class=10, return_X_y=True) + # small test_size to force early stop + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.01, random_state=1 + ) + clf = xgb.XGBClassifier( + n_estimators=64, tree_method="hist", early_stopping_rounds=2 + ) + clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + score = clf.best_score + clf.save_model(model_path) + + clf = xgb.XGBClassifier() + clf.load_model(model_path) + assert clf.classes_.size == 10 + np.testing.assert_equal(clf.classes_, np.arange(10)) + assert clf.n_classes_ == 10 + + assert clf.best_iteration == 27 + assert clf.best_score == score + def test_RFECV(): from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris From acc110c25189f4b1f16b1bc0d813db47f43dee5c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Mar 2023 23:10:54 +0800 Subject: [PATCH 4/5] [MT-TREE] Support prediction cache and model slicing. (#8968) - Fix prediction range. - Support prediction cache in mt-hist. - Support model slicing. - Make the booster a Python iterable by defining `__iter__`. - Cleanup removed/deprecated parameters. - A new field in the output model `iteration_indptr` for pointing to the ranges of trees for each iteration. --- demo/json-model/json_parser.py | 3 - doc/model.schema | 23 +-- include/xgboost/base.h | 12 +- include/xgboost/gbm.h | 49 +++--- include/xgboost/learner.h | 8 +- include/xgboost/tree_updater.h | 4 +- python-package/xgboost/core.py | 5 + src/gbm/gblinear.cc | 2 +- src/gbm/gbtree.cc | 176 +++++++++++----------- src/gbm/gbtree.h | 109 ++++++-------- src/gbm/gbtree_model.cc | 131 +++++++++++++--- src/gbm/gbtree_model.h | 86 ++++++----- src/learner.cc | 8 +- src/predictor/cpu_predictor.cc | 5 +- src/predictor/gpu_predictor.cu | 1 - src/tree/hist/evaluate_splits.h | 42 +++++- src/tree/updater_approx.cc | 4 +- src/tree/updater_gpu_hist.cu | 7 +- src/tree/updater_quantile_hist.cc | 22 ++- tests/cpp/common/test_json.cc | 7 +- tests/cpp/gbm/test_gbtree.cc | 2 +- tests/cpp/helpers.cc | 17 --- tests/cpp/helpers.h | 8 +- tests/cpp/predictor/test_gpu_predictor.cu | 8 +- tests/cpp/predictor/test_predictor.cc | 4 +- tests/cpp/predictor/test_predictor.h | 17 +++ tests/cpp/test_serialization.cc | 5 +- tests/cpp/tree/test_gpu_hist.cu | 2 +- tests/cpp/tree/test_prediction_cache.cc | 36 +++-- tests/python/test_basic_models.py | 42 +++++- 30 files changed, 502 insertions(+), 343 deletions(-) diff --git a/demo/json-model/json_parser.py b/demo/json-model/json_parser.py index 315ede61b..b744d9569 100644 --- a/demo/json-model/json_parser.py +++ b/demo/json-model/json_parser.py @@ -162,9 +162,6 @@ class Model: # Load the trees self.num_trees = int(model_shape["num_trees"]) - self.leaf_size = int(model_shape["size_leaf_vector"]) - # Right now XGBoost doesn't support vector leaf yet - assert self.leaf_size == 0, str(self.leaf_size) trees: List[Tree] = [] for i in range(self.num_trees): diff --git a/doc/model.schema b/doc/model.schema index 07a871820..b9e2da305 100644 --- a/doc/model.schema +++ b/doc/model.schema @@ -19,23 +19,7 @@ "type": "object", "properties": { "tree_param": { - "type": "object", - "properties": { - "num_nodes": { - "type": "string" - }, - "size_leaf_vector": { - "type": "string" - }, - "num_feature": { - "type": "string" - } - }, - "required": [ - "num_nodes", - "num_feature", - "size_leaf_vector" - ] + "$ref": "#/definitions/tree_param" }, "id": { "type": "integer" @@ -170,14 +154,11 @@ }, "num_parallel_tree": { "type": "string" - }, - "size_leaf_vector": { - "type": "string" } }, "required": [ "num_trees", - "size_leaf_vector" + "num_parallel_tree" ] }, "tree_param": { diff --git a/include/xgboost/base.h b/include/xgboost/base.h index 00fc7fb4a..43540beea 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -113,8 +113,18 @@ using bst_row_t = std::size_t; // NOLINT using bst_node_t = std::int32_t; // NOLINT /*! \brief Type for ranking group index. */ using bst_group_t = std::uint32_t; // NOLINT -/*! \brief Type for indexing into output targets. */ +/** + * \brief Type for indexing into output targets. + */ using bst_target_t = std::uint32_t; // NOLINT +/** + * brief Type for indexing boosted layers. + */ +using bst_layer_t = std::int32_t; // NOLINT +/** + * \brief Type for indexing trees. + */ +using bst_tree_t = std::int32_t; // NOLINT namespace detail { /*! \brief Implementation of gradient statistics pair. Template specialisation diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h index 07758a524..4f690064f 100644 --- a/include/xgboost/gbm.h +++ b/include/xgboost/gbm.h @@ -59,16 +59,16 @@ class GradientBooster : public Model, public Configurable { * \param fo output stream */ virtual void Save(dmlc::Stream* fo) const = 0; - /*! + /** * \brief Slice a model using boosting index. The slice m:n indicates taking all trees * that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1). - * \param layer_begin Beginning of boosted tree layer used for prediction. - * \param layer_end End of booster layer. 0 means do not limit trees. - * \param out Output gradient booster + * \param begin Beginning of boosted tree layer used for prediction. + * \param end End of booster layer. 0 means do not limit trees. + * \param out Output gradient booster */ - virtual void Slice(int32_t /*layer_begin*/, int32_t /*layer_end*/, int32_t /*step*/, + virtual void Slice(bst_layer_t /*begin*/, bst_layer_t /*end*/, bst_layer_t /*step*/, GradientBooster* /*out*/, bool* /*out_of_bound*/) const { - LOG(FATAL) << "Slice is not supported by current booster."; + LOG(FATAL) << "Slice is not supported by the current booster."; } /*! \brief Return number of boosted rounds. */ @@ -88,34 +88,31 @@ class GradientBooster : public Model, public Configurable { virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, PredictionCacheEntry*, ObjFunction const* obj) = 0; - /*! - * \brief generate predictions for given feature matrix - * \param dmat feature matrix + /** + * \brief Generate predictions for given feature matrix + * + * \param dmat The feature matrix. * \param out_preds output vector to hold the predictions * \param training Whether the prediction value is used for training. For dart booster * drop out is performed during training. - * \param layer_begin Beginning of boosted tree layer used for prediction. - * \param layer_end End of booster layer. 0 means do not limit trees. + * \param begin Beginning of boosted tree layer used for prediction. + * \param end End of booster layer. 0 means do not limit trees. */ - virtual void PredictBatch(DMatrix* dmat, - PredictionCacheEntry* out_preds, - bool training, - unsigned layer_begin, - unsigned layer_end) = 0; + virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds, bool training, + bst_layer_t begin, bst_layer_t end) = 0; - /*! + /** * \brief Inplace prediction. * - * \param p_fmat A proxy DMatrix that contains the data and related - * meta info. - * \param missing Missing value in the data. - * \param [in,out] out_preds The output preds. - * \param layer_begin (Optional) Beginning of boosted tree layer used for prediction. - * \param layer_end (Optional) End of booster layer. 0 means do not limit trees. + * \param p_fmat A proxy DMatrix that contains the data and related. + * \param missing Missing value in the data. + * \param [in,out] out_preds The output preds. + * \param begin (Optional) Beginning of boosted tree layer used for prediction. + * \param end (Optional) End of booster layer. 0 means do not limit trees. */ - virtual void InplacePredict(std::shared_ptr, float, PredictionCacheEntry*, uint32_t, - uint32_t) const { - LOG(FATAL) << "Inplace predict is not supported by current booster."; + virtual void InplacePredict(std::shared_ptr, float, PredictionCacheEntry*, bst_layer_t, + bst_layer_t) const { + LOG(FATAL) << "Inplace predict is not supported by the current booster."; } /*! * \brief online prediction function, predict score for one instance at a time diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 08e1ded09..f2b377ac1 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -9,7 +9,7 @@ #define XGBOOST_LEARNER_H_ #include // for Serializable -#include // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair +#include // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, .. #include // for Context #include // for Tensor, TensorView #include // for Metric @@ -229,7 +229,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable { */ virtual void GetFeatureTypes(std::vector* ft) const = 0; - /*! + /** * \brief Slice the model. * * See InplacePredict for layer parameters. @@ -239,8 +239,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable { * * \return a sliced model. */ - virtual Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step, - bool *out_of_bound) = 0; + virtual Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, + bool* out_of_bound) = 0; /*! * \brief dump the model in the requested format * \param fmap feature map that may help give interpretations of feature diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h index 02248ed8c..79b80319f 100644 --- a/include/xgboost/tree_updater.h +++ b/include/xgboost/tree_updater.h @@ -85,8 +85,8 @@ class TreeUpdater : public Configurable { * the prediction cache. If true, the prediction cache will have been * updated by the time this function returns. */ - virtual bool UpdatePredictionCache(const DMatrix * /*data*/, - linalg::VectorView /*out_preds*/) { + virtual bool UpdatePredictionCache(const DMatrix* /*data*/, + linalg::MatrixView /*out_preds*/) { return false; } diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index a0393391e..30aa771e3 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -17,6 +17,7 @@ from typing import ( Any, Callable, Dict, + Generator, Iterable, List, Optional, @@ -1756,6 +1757,10 @@ class Booster: sliced.handle = sliced_handle return sliced + def __iter__(self) -> Generator["Booster", None, None]: + for i in range(0, self.num_boosted_rounds()): + yield self[i] + def save_config(self) -> str: """Output internal parameter configuration of Booster as a JSON string. diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index 575820758..f1189886c 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -148,7 +148,7 @@ class GBLinear : public GradientBooster { } void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* predts, bool /*training*/, - uint32_t layer_begin, uint32_t) override { + bst_layer_t layer_begin, bst_layer_t) override { monitor_.Start("PredictBatch"); LinearCheckLayer(layer_begin); auto* out_preds = &predts->predictions; diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index a912d6a75..f67c05344 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -225,10 +225,9 @@ void CopyGradient(HostDeviceVector const* in_gpair, int32_t n_thre } void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector const& predictions, - ObjFunction const* obj, - std::int32_t group_idx, + ObjFunction const* obj, std::int32_t group_idx, std::vector> const& node_position, - std::vector>* p_trees) { + TreesOneGroup* p_trees) { CHECK(!updaters_.empty()); if (!updaters_.back()->HasNodePosition()) { return; @@ -252,8 +251,8 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector const void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, PredictionCacheEntry* predt, ObjFunction const* obj) { - std::vector>> new_trees; - const int ngroup = model_.learner_model_param->OutputLength(); + TreesOneIter new_trees; + bst_target_t const n_groups = model_.learner_model_param->OutputLength(); ConfigureWithKnownData(this->cfg_, p_fmat); monitor_.Start("BoostNewTrees"); @@ -265,7 +264,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, device, device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(), p_fmat->Info().num_row_, model_.learner_model_param->OutputLength()); - CHECK_NE(ngroup, 0); + CHECK_NE(n_groups, 0); if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) { LOG(FATAL) << "Current objective doesn't support external memory."; @@ -276,36 +275,39 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, std::vector> node_position; if (model_.learner_model_param->IsVectorLeaf()) { - std::vector> ret; + TreesOneGroup ret; BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret); - // No update prediction cache yet. + std::size_t num_new_trees = ret.size(); new_trees.push_back(std::move(ret)); - } else if (model_.learner_model_param->OutputLength() == 1) { - std::vector> ret; + if (updaters_.size() > 0 && num_new_trees == 1 && predt->predictions.Size() > 0 && + updaters_.back()->UpdatePredictionCache(p_fmat, out)) { + predt->Update(1); + } + } else if (model_.learner_model_param->OutputLength() == 1u) { + TreesOneGroup ret; BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret); const size_t num_new_trees = ret.size(); new_trees.push_back(std::move(ret)); - auto v_predt = out.Slice(linalg::All(), 0); if (updaters_.size() > 0 && num_new_trees == 1 && predt->predictions.Size() > 0 && - updaters_.back()->UpdatePredictionCache(p_fmat, v_predt)) { + updaters_.back()->UpdatePredictionCache(p_fmat, out)) { predt->Update(1); } } else { - CHECK_EQ(in_gpair->Size() % ngroup, 0U) << "must have exactly ngroup * nrow gpairs"; - HostDeviceVector tmp(in_gpair->Size() / ngroup, GradientPair(), + CHECK_EQ(in_gpair->Size() % n_groups, 0U) << "must have exactly ngroup * nrow gpairs"; + HostDeviceVector tmp(in_gpair->Size() / n_groups, GradientPair(), in_gpair->DeviceIdx()); bool update_predict = true; - for (int gid = 0; gid < ngroup; ++gid) { + for (bst_target_t gid = 0; gid < n_groups; ++gid) { node_position.clear(); - CopyGradient(in_gpair, ctx_->Threads(), ngroup, gid, &tmp); - std::vector> ret; + CopyGradient(in_gpair, ctx_->Threads(), n_groups, gid, &tmp); + TreesOneGroup ret; BoostNewTrees(&tmp, p_fmat, gid, &node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, gid, node_position, &ret); const size_t num_new_trees = ret.size(); new_trees.push_back(std::move(ret)); - auto v_predt = out.Slice(linalg::All(), gid); + auto v_predt = out.Slice(linalg::All(), linalg::Range(gid, gid + 1)); if (!(updaters_.size() > 0 && predt->predictions.Size() > 0 && num_new_trees == 1 && updaters_.back()->UpdatePredictionCache(p_fmat, v_predt))) { update_predict = false; @@ -363,7 +365,7 @@ void GBTree::InitUpdater(Args const& cfg) { void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fmat, int bst_group, std::vector>* out_position, - std::vector>* ret) { + TreesOneGroup* ret) { std::vector new_trees; ret->clear(); // create the trees @@ -419,15 +421,9 @@ void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fma tree_param_.learning_rate = lr; } -void GBTree::CommitModel(std::vector>>&& new_trees) { +void GBTree::CommitModel(TreesOneIter&& new_trees) { monitor_.Start("CommitModel"); - if (this->model_.learner_model_param->IsVectorLeaf()) { - model_.CommitModel(std::move(new_trees[0]), 0); - } else { - for (std::uint32_t gid = 0; gid < model_.learner_model_param->OutputLength(); ++gid) { - model_.CommitModel(std::move(new_trees[gid]), gid); - } - } + model_.CommitModel(std::forward(new_trees)); monitor_.Stop("CommitModel"); } @@ -519,28 +515,32 @@ void GBTree::SaveModel(Json* p_out) const { model_.SaveModel(&model); } -void GBTree::Slice(int32_t layer_begin, int32_t layer_end, int32_t step, - GradientBooster *out, bool* out_of_bound) const { +void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GradientBooster* out, + bool* out_of_bound) const { CHECK(configured_); CHECK(out); - auto p_gbtree = dynamic_cast(out); + auto p_gbtree = dynamic_cast(out); CHECK(p_gbtree); - GBTreeModel &out_model = p_gbtree->model_; - auto layer_trees = this->LayerTrees(); - CHECK_NE(this->model_.learner_model_param->num_feature, 0); - CHECK_NE(layer_trees, 0); + GBTreeModel& out_model = p_gbtree->model_; + CHECK(this->model_.learner_model_param->Initialized()); - layer_end = layer_end == 0 ? model_.trees.size() / layer_trees : layer_end; - CHECK_GT(layer_end, layer_begin); + end = end == 0 ? model_.BoostedRounds() : end; CHECK_GE(step, 1); - int32_t n_layers = (layer_end - layer_begin) / step; - std::vector> &out_trees = out_model.trees; - out_trees.resize(layer_trees * n_layers); - std::vector &out_trees_info = out_model.tree_info; - out_trees_info.resize(layer_trees * n_layers); - out_model.param.num_trees = out_model.trees.size(); - out_model.param.num_parallel_tree = model_.param.num_parallel_tree; + CHECK_NE(end, begin) << "Empty slice is not allowed."; + + if (step > (end - begin)) { + *out_of_bound = true; + return; + } + + auto& out_indptr = out_model.iteration_indptr; + TreesOneGroup& out_trees = out_model.trees; + std::vector& out_trees_info = out_model.tree_info; + + bst_layer_t n_layers = (end - begin) / step; + out_indptr.resize(n_layers + 1, 0); + if (!this->model_.trees_to_update.empty()) { CHECK_EQ(this->model_.trees_to_update.size(), this->model_.trees.size()) << "Not all trees are updated, " @@ -549,26 +549,31 @@ void GBTree::Slice(int32_t layer_begin, int32_t layer_end, int32_t step, "want to update a portion of trees."; } - *out_of_bound = detail::SliceTrees(layer_begin, layer_end, step, this->model_, layer_trees, - [&](auto const& in_it, auto const& out_it) { - auto new_tree = - std::make_unique(*this->model_.trees.at(in_it)); - bst_group_t group = this->model_.tree_info[in_it]; - out_trees.at(out_it) = std::move(new_tree); - out_trees_info.at(out_it) = group; - }); + *out_of_bound = + detail::SliceTrees(begin, end, step, this->model_, [&](auto in_tree_idx, auto out_l) { + auto new_tree = std::make_unique(*this->model_.trees.at(in_tree_idx)); + out_trees.emplace_back(std::move(new_tree)); + + bst_group_t group = this->model_.tree_info[in_tree_idx]; + out_trees_info.push_back(group); + + out_model.iteration_indptr[out_l + 1]++; + }); + + std::partial_sum(out_indptr.cbegin(), out_indptr.cend(), out_indptr.begin()); + CHECK_EQ(out_model.iteration_indptr.front(), 0); + + out_model.param.num_trees = out_model.trees.size(); + out_model.param.num_parallel_tree = model_.param.num_parallel_tree; } -void GBTree::PredictBatch(DMatrix* p_fmat, - PredictionCacheEntry* out_preds, - bool, - unsigned layer_begin, - unsigned layer_end) { +void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool, + bst_layer_t layer_begin, bst_layer_t layer_end) { CHECK(configured_); if (layer_end == 0) { layer_end = this->BoostedRounds(); } - if (layer_begin != 0 || layer_end < out_preds->version) { + if (layer_begin != 0 || layer_end < static_cast(out_preds->version)) { // cache is dropped. out_preds->version = 0; } @@ -590,8 +595,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat, predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, model_); } - std::uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees."; if (tree_end > tree_begin) { predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end); @@ -729,10 +733,9 @@ class Dart : public GBTree { auto p_dart = dynamic_cast(out); CHECK(p_dart); CHECK(p_dart->weight_drop_.empty()); - detail::SliceTrees(layer_begin, layer_end, step, model_, this->LayerTrees(), - [&](auto const& in_it, auto const&) { - p_dart->weight_drop_.push_back(this->weight_drop_.at(in_it)); - }); + detail::SliceTrees(layer_begin, layer_end, step, model_, [&](auto const& in_it, auto const&) { + p_dart->weight_drop_.push_back(this->weight_drop_.at(in_it)); + }); } void SaveModel(Json *p_out) const override { @@ -798,8 +801,7 @@ class Dart : public GBTree { predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_); p_out_preds->version = 0; - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); auto n_groups = model_.learner_model_param->num_output_group; PredictionCacheEntry predts; // temporary storage for prediction @@ -807,14 +809,18 @@ class Dart : public GBTree { predts.predictions.SetDevice(ctx_->gpu_id); } predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0); + // multi-target is not yet supported. + auto layer_trees = [&]() { + return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength(); + }; - for (size_t i = tree_begin; i < tree_end; i += 1) { + for (bst_tree_t i = tree_begin; i < tree_end; i += 1) { if (training && std::binary_search(idx_drop_.cbegin(), idx_drop_.cend(), i)) { continue; } CHECK_GE(i, p_out_preds->version); - auto version = i / this->LayerTrees(); + auto version = i / layer_trees(); p_out_preds->version = version; predts.predictions.Fill(0); predictor->PredictBatch(p_fmat, &predts, model_, i, i + 1); @@ -841,21 +847,17 @@ class Dart : public GBTree { } } - void PredictBatch(DMatrix* p_fmat, - PredictionCacheEntry* p_out_preds, - bool training, - unsigned layer_begin, - unsigned layer_end) override { + void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* p_out_preds, bool training, + bst_layer_t layer_begin, bst_layer_t layer_end) override { DropTrees(training); this->PredictBatchImpl(p_fmat, p_out_preds, training, layer_begin, layer_end); } void InplacePredict(std::shared_ptr p_fmat, float missing, - PredictionCacheEntry* p_out_preds, uint32_t layer_begin, - unsigned layer_end) const override { + PredictionCacheEntry* p_out_preds, bst_layer_t layer_begin, + bst_layer_t layer_end) const override { CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented(); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); auto n_groups = model_.learner_model_param->num_output_group; std::vector predictors { @@ -897,7 +899,7 @@ class Dart : public GBTree { }; // Inplace predict is not used for training, so no need to drop tree. - for (size_t i = tree_begin; i < tree_end; ++i) { + for (bst_tree_t i = tree_begin; i < tree_end; ++i) { predict_impl(i); if (i == tree_begin) { predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_); @@ -941,31 +943,25 @@ class Dart : public GBTree { unsigned layer_begin, unsigned layer_end, bool approximate, int, unsigned) override { CHECK(configured_); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); - cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_, - tree_end, &weight_drop_, approximate); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); + cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_, tree_end, &weight_drop_, + approximate); } void PredictInteractionContributions( DMatrix *p_fmat, HostDeviceVector *out_contribs, unsigned layer_begin, unsigned layer_end, bool approximate) override { CHECK(configured_); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, tree_end, &weight_drop_, approximate); } protected: // commit new trees all at once - void CommitModel(std::vector>>&& new_trees) override { - int num_new_trees = 0; - for (uint32_t gid = 0; gid < model_.learner_model_param->num_output_group; ++gid) { - num_new_trees += new_trees[gid].size(); - model_.CommitModel(std::move(new_trees[gid]), gid); - } - size_t num_drop = NormalizeTrees(num_new_trees); + void CommitModel(TreesOneIter&& new_trees) override { + auto n_new_trees = model_.CommitModel(std::forward(new_trees)); + size_t num_drop = NormalizeTrees(n_new_trees); LOG(INFO) << "drop " << num_drop << " trees, " << "weight = " << weight_drop_.back(); } diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index b64532c61..6e7da77ac 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -139,23 +139,13 @@ struct DartTrainParam : public XGBoostParameter { namespace detail { // From here on, layer becomes concrete trees. -inline std::pair LayerToTree(gbm::GBTreeModel const& model, - std::uint32_t layer_begin, - std::uint32_t layer_end) { - std::uint32_t tree_begin; - std::uint32_t tree_end; - if (model.learner_model_param->IsVectorLeaf()) { - tree_begin = layer_begin * model.param.num_parallel_tree; - tree_end = layer_end * model.param.num_parallel_tree; - } else { - bst_group_t groups = model.learner_model_param->OutputLength(); - tree_begin = layer_begin * groups * model.param.num_parallel_tree; - tree_end = layer_end * groups * model.param.num_parallel_tree; - } - - if (tree_end == 0) { - tree_end = model.trees.size(); - } +inline std::pair LayerToTree(gbm::GBTreeModel const& model, + bst_layer_t begin, bst_layer_t end) { + CHECK(!model.iteration_indptr.empty()); + end = end == 0 ? model.BoostedRounds() : end; + CHECK_LE(end, model.BoostedRounds()) << "Out of range for tree layers."; + bst_tree_t tree_begin = model.iteration_indptr[begin]; + bst_tree_t tree_end = model.iteration_indptr[end]; if (model.trees.size() != 0) { CHECK_LE(tree_begin, tree_end); } @@ -164,27 +154,33 @@ inline std::pair LayerToTree(gbm::GBTreeModel const& model, // Call fn for each pair of input output tree. Return true if index is out of bound. template -bool SliceTrees(int32_t layer_begin, int32_t layer_end, int32_t step, GBTreeModel const& model, - uint32_t layer_trees, Func fn) { - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model, layer_begin, layer_end); - if (tree_end > model.trees.size()) { +bool SliceTrees(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GBTreeModel const& model, + Func&& fn) { + end = end == 0 ? model.iteration_indptr.size() : end; + CHECK_GE(step, 1); + if (step > end - begin) { + return true; + } + if (end > model.BoostedRounds()) { return true; } - layer_end = layer_end == 0 ? model.trees.size() / layer_trees : layer_end; - uint32_t n_layers = (layer_end - layer_begin) / step; - int32_t in_it = tree_begin; - int32_t out_it = 0; - for (uint32_t l = 0; l < n_layers; ++l) { - for (uint32_t i = 0; i < layer_trees; ++i) { - CHECK_LT(in_it, tree_end); - fn(in_it, out_it); - out_it++; - in_it++; + bst_layer_t n_layers = (end - begin) / step; + bst_layer_t out_l = 0; + + for (bst_layer_t l = begin; l < end; l += step) { + auto [tree_begin, tree_end] = detail::LayerToTree(model, l, l + 1); + if (tree_end > static_cast(model.trees.size())) { + return true; } - in_it += (step - 1) * layer_trees; + + for (bst_tree_t tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) { + fn(tree_idx, out_l); + } + ++out_l; } + + CHECK_EQ(out_l, n_layers); return false; } } // namespace detail @@ -241,37 +237,22 @@ class GBTree : public GradientBooster { void SaveModel(Json* p_out) const override; void LoadModel(Json const& in) override; - // Number of trees per layer. - [[nodiscard]] std::uint32_t LayerTrees() const { - if (model_.learner_model_param->IsVectorLeaf()) { - return model_.param.num_parallel_tree; - } - return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength(); - } - // slice the trees, out must be already allocated - void Slice(int32_t layer_begin, int32_t layer_end, int32_t step, - GradientBooster *out, bool* out_of_bound) const override; - - [[nodiscard]] std::int32_t BoostedRounds() const override { - CHECK_NE(model_.param.num_parallel_tree, 0); - CHECK_NE(model_.learner_model_param->num_output_group, 0); - - return model_.trees.size() / this->LayerTrees(); - } + void Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GradientBooster* out, + bool* out_of_bound) const override; + [[nodiscard]] std::int32_t BoostedRounds() const override { return this->model_.BoostedRounds(); } [[nodiscard]] bool ModelFitted() const override { return !model_.trees.empty() || !model_.trees_to_update.empty(); } - void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *out_preds, - bool training, unsigned layer_begin, unsigned layer_end) override; + void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool training, + bst_layer_t layer_begin, bst_layer_t layer_end) override; void InplacePredict(std::shared_ptr p_m, float missing, PredictionCacheEntry* out_preds, - uint32_t layer_begin, unsigned layer_end) const override { + bst_layer_t layer_begin, bst_layer_t layer_end) const override { CHECK(configured_); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees."; std::vector predictors{ cpu_predictor_.get(), @@ -364,20 +345,18 @@ class GBTree : public GradientBooster { } } - void PredictInstance(const SparsePage::Inst& inst, - std::vector* out_preds, + void PredictInstance(const SparsePage::Inst& inst, std::vector* out_preds, uint32_t layer_begin, uint32_t layer_end) override { CHECK(configured_); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + std::uint32_t _, tree_end; + std::tie(_, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); cpu_predictor_->PredictInstance(inst, out_preds, model_, tree_end); } void PredictLeaf(DMatrix* p_fmat, HostDeviceVector* out_preds, uint32_t layer_begin, uint32_t layer_end) override { - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_EQ(tree_begin, 0) << "Predict leaf supports only iteration end: (0, " "n_iteration), use model slicing instead."; this->GetPredictor()->PredictLeaf(p_fmat, out_preds, model_, tree_end); @@ -388,8 +367,7 @@ class GBTree : public GradientBooster { uint32_t layer_begin, uint32_t layer_end, bool approximate, int, unsigned) override { CHECK(configured_); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_EQ(tree_begin, 0) << "Predict contribution supports only iteration end: (0, " "n_iteration), using model slicing instead."; @@ -401,8 +379,7 @@ class GBTree : public GradientBooster { DMatrix *p_fmat, HostDeviceVector *out_contribs, uint32_t layer_begin, uint32_t layer_end, bool approximate) override { CHECK(configured_); - uint32_t tree_begin, tree_end; - std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); + auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_EQ(tree_begin, 0) << "Predict interaction contribution supports only iteration end: (0, " "n_iteration), using model slicing instead."; @@ -427,7 +404,7 @@ class GBTree : public GradientBooster { DMatrix* f_dmat = nullptr) const; // commit new trees all at once - virtual void CommitModel(std::vector>>&& new_trees); + virtual void CommitModel(TreesOneIter&& new_trees); // --- data structure --- GBTreeModel model_; diff --git a/src/gbm/gbtree_model.cc b/src/gbm/gbtree_model.cc index 4e9cc6655..1373e3e2b 100644 --- a/src/gbm/gbtree_model.cc +++ b/src/gbm/gbtree_model.cc @@ -1,15 +1,55 @@ -/*! - * Copyright 2019-2022 by Contributors +/** + * Copyright 2019-2023, XGBoost Contributors */ -#include - -#include "xgboost/json.h" -#include "xgboost/logging.h" #include "gbtree_model.h" -#include "gbtree.h" -namespace xgboost { -namespace gbm { +#include // for transform, max_element +#include // for size_t +#include // for partial_sum +#include // for operator<<, basic_ostream +#include // for move, pair + +#include "../common/threading_utils.h" // for ParallelFor +#include "dmlc/base.h" // for BeginPtr +#include "dmlc/io.h" // for Stream +#include "xgboost/context.h" // for Context +#include "xgboost/json.h" // for Json, get, Integer, Array, FromJson, ToJson, Json... +#include "xgboost/learner.h" // for LearnerModelParam +#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK +#include "xgboost/tree_model.h" // for RegTree + +namespace xgboost::gbm { +namespace { +// For creating the tree indptr from old models. +void MakeIndptr(GBTreeModel* out_model) { + auto const& tree_info = out_model->tree_info; + if (tree_info.empty()) { + return; + } + + auto n_groups = *std::max_element(tree_info.cbegin(), tree_info.cend()) + 1; + + auto& indptr = out_model->iteration_indptr; + auto layer_trees = out_model->param.num_parallel_tree * n_groups; + CHECK_NE(layer_trees, 0); + indptr.resize(out_model->param.num_trees / layer_trees + 1, 0); + indptr[0] = 0; + + for (std::size_t i = 1; i < indptr.size(); ++i) { + indptr[i] = n_groups * out_model->param.num_parallel_tree; + } + std::partial_sum(indptr.cbegin(), indptr.cend(), indptr.begin()); +} + +// Validate the consistency of the model. +void Validate(GBTreeModel const& model) { + CHECK_EQ(model.trees.size(), model.param.num_trees); + CHECK_EQ(model.tree_info.size(), model.param.num_trees); + // True even if the model is empty since we should always have 0 as the first element. + CHECK_EQ(model.iteration_indptr.back(), model.param.num_trees); +} +} // namespace + void GBTreeModel::Save(dmlc::Stream* fo) const { CHECK_EQ(param.num_trees, static_cast(trees.size())); @@ -61,6 +101,9 @@ void GBTreeModel::Load(dmlc::Stream* fi) { } } } + + MakeIndptr(this); + Validate(*this); } void GBTreeModel::SaveModel(Json* p_out) const { @@ -72,10 +115,10 @@ void GBTreeModel::SaveModel(Json* p_out) const { CHECK(ctx_); common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) { auto const& tree = trees[t]; - Json tree_json{Object()}; - tree->SaveModel(&tree_json); - tree_json["id"] = Integer{static_cast(t)}; - trees_json[t] = std::move(tree_json); + Json jtree{Object{}}; + tree->SaveModel(&jtree); + jtree["id"] = Integer{static_cast(t)}; + trees_json[t] = std::move(jtree); }); std::vector tree_info_json(tree_info.size()); @@ -85,6 +128,11 @@ void GBTreeModel::SaveModel(Json* p_out) const { out["trees"] = Array(std::move(trees_json)); out["tree_info"] = Array(std::move(tree_info_json)); + + std::vector jiteration_indptr(iteration_indptr.size()); + std::transform(iteration_indptr.cbegin(), iteration_indptr.cend(), jiteration_indptr.begin(), + [](bst_tree_t i) { return Integer{i}; }); + out["iteration_indptr"] = Array{std::move(jiteration_indptr)}; } void GBTreeModel::LoadModel(Json const& in) { @@ -93,22 +141,59 @@ void GBTreeModel::LoadModel(Json const& in) { trees.clear(); trees_to_update.clear(); + auto const& jmodel = get(in); + auto const& trees_json = get(in["trees"]); - trees.resize(trees_json.size()); + CHECK_EQ(trees_json.size(), param.num_trees); + trees.resize(param.num_trees); + + auto const& tree_info_json = get(in["tree_info"]); + CHECK_EQ(tree_info_json.size(), param.num_trees); + tree_info.resize(param.num_trees); CHECK(ctx_); - common::ParallelFor(trees_json.size(), ctx_->Threads(), [&](auto t) { - auto tree_id = get(trees_json[t]["id"]); - trees.at(tree_id).reset(new RegTree()); - trees.at(tree_id)->LoadModel(trees_json[t]); + + common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) { + auto tree_id = get(trees_json[t]["id"]); + trees.at(tree_id).reset(new RegTree{}); + trees[tree_id]->LoadModel(trees_json[t]); }); - tree_info.resize(param.num_trees); - auto const& tree_info_json = get(in["tree_info"]); - for (int32_t i = 0; i < param.num_trees; ++i) { + for (bst_tree_t i = 0; i < param.num_trees; ++i) { tree_info[i] = get(tree_info_json[i]); } + + auto indptr_it = jmodel.find("iteration_indptr"); + iteration_indptr.clear(); + if (indptr_it != jmodel.cend()) { + auto const& vec = get(indptr_it->second); + iteration_indptr.resize(vec.size()); + std::transform(vec.cbegin(), vec.cend(), iteration_indptr.begin(), + [](Json const& v) { return get(v); }); + } else { + MakeIndptr(this); + } + + Validate(*this); } -} // namespace gbm -} // namespace xgboost +bst_tree_t GBTreeModel::CommitModel(TreesOneIter&& new_trees) { + CHECK(!iteration_indptr.empty()); + CHECK_EQ(iteration_indptr.back(), param.num_trees); + bst_tree_t n_new_trees{0}; + + if (learner_model_param->IsVectorLeaf()) { + n_new_trees += new_trees.front().size(); + this->CommitModelGroup(std::move(new_trees.front()), 0); + } else { + for (bst_target_t gidx{0}; gidx < learner_model_param->OutputLength(); ++gidx) { + n_new_trees += new_trees[gidx].size(); + this->CommitModelGroup(std::move(new_trees[gidx]), gidx); + } + } + + iteration_indptr.push_back(n_new_trees + iteration_indptr.back()); + Validate(*this); + return n_new_trees; +} +} // namespace xgboost::gbm diff --git a/src/gbm/gbtree_model.h b/src/gbm/gbtree_model.h index 1f2bdfa63..32fa86863 100644 --- a/src/gbm/gbtree_model.h +++ b/src/gbm/gbtree_model.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2017-2020 by Contributors +/** + * Copyright 2017-2023, XGBoost Contributors * \file gbtree_model.h */ #ifndef XGBOOST_GBM_GBTREE_MODEL_H_ @@ -25,26 +25,28 @@ namespace xgboost { class Json; namespace gbm { +/** + * \brief Container for all trees built (not update) for one group. + */ +using TreesOneGroup = std::vector>; +/** + * \brief Container for all trees built (not update) for one iteration. + */ +using TreesOneIter = std::vector; /*! \brief model parameters */ struct GBTreeModelParam : public dmlc::Parameter { public: - /*! \brief number of trees */ - int32_t num_trees; - /*! \brief (Deprecated) number of roots */ - int32_t num_parallel_tree; - /*! \brief number of features to be used by trees */ - int32_t deprecated_num_feature; - /*! \brief pad this space, for backward compatibility reason.*/ - int32_t pad_32bit; - /*! \brief deprecated padding space. */ - int64_t deprecated_num_pbuffer; - // deprecated. use learner_model_param_->num_output_group. - int32_t deprecated_num_output_group; - /*! \brief size of leaf vector needed in tree */ - int32_t size_leaf_vector; + /** + * \brief number of trees + */ + std::int32_t num_trees; + /** + * \brief Number of trees for a forest. + */ + std::int32_t num_parallel_tree; /*! \brief reserved parameters */ - int32_t reserved[32]; + int32_t reserved[38]; /*! \brief constructor */ GBTreeModelParam() { @@ -66,23 +68,14 @@ struct GBTreeModelParam : public dmlc::Parameter { .describe( "Number of parallel trees constructed during each iteration." " This option is used to support boosted random forest."); - DMLC_DECLARE_FIELD(size_leaf_vector) - .set_lower_bound(0) - .set_default(0) - .describe("Reserved option for vector tree."); } // Swap byte order for all fields. Useful for transporting models between machines with different // endianness (big endian vs little endian) - inline GBTreeModelParam ByteSwap() const { + GBTreeModelParam ByteSwap() const { GBTreeModelParam x = *this; dmlc::ByteSwap(&x.num_trees, sizeof(x.num_trees), 1); dmlc::ByteSwap(&x.num_parallel_tree, sizeof(x.num_parallel_tree), 1); - dmlc::ByteSwap(&x.deprecated_num_feature, sizeof(x.deprecated_num_feature), 1); - dmlc::ByteSwap(&x.pad_32bit, sizeof(x.pad_32bit), 1); - dmlc::ByteSwap(&x.deprecated_num_pbuffer, sizeof(x.deprecated_num_pbuffer), 1); - dmlc::ByteSwap(&x.deprecated_num_output_group, sizeof(x.deprecated_num_output_group), 1); - dmlc::ByteSwap(&x.size_leaf_vector, sizeof(x.size_leaf_vector), 1); dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0])); return x; } @@ -107,6 +100,9 @@ struct GBTreeModel : public Model { trees.clear(); param.num_trees = 0; tree_info.clear(); + + iteration_indptr.clear(); + iteration_indptr.push_back(0); } } @@ -116,22 +112,35 @@ struct GBTreeModel : public Model { void SaveModel(Json* p_out) const override; void LoadModel(Json const& p_out) override; - std::vector DumpModel(const FeatureMap& fmap, bool with_stats, int32_t n_threads, - std::string format) const { + [[nodiscard]] std::vector DumpModel(const FeatureMap& fmap, bool with_stats, + int32_t n_threads, std::string format) const { std::vector dump(trees.size()); common::ParallelFor(trees.size(), n_threads, [&](size_t i) { dump[i] = trees[i]->DumpModel(fmap, with_stats, format); }); return dump; } - void CommitModel(std::vector >&& new_trees, - int bst_group) { - for (auto & new_tree : new_trees) { + /** + * \brief Add trees to the model. + * + * \return The number of new trees. + */ + bst_tree_t CommitModel(TreesOneIter&& new_trees); + + void CommitModelGroup(std::vector>&& new_trees, bst_target_t group_idx) { + for (auto& new_tree : new_trees) { trees.push_back(std::move(new_tree)); - tree_info.push_back(bst_group); + tree_info.push_back(group_idx); } param.num_trees += static_cast(new_trees.size()); } + [[nodiscard]] std::int32_t BoostedRounds() const { + if (trees.empty()) { + CHECK_EQ(iteration_indptr.size(), 1); + } + return static_cast(iteration_indptr.size() - 1); + } + // base margin LearnerModelParam const* learner_model_param; // model parameter @@ -140,10 +149,19 @@ struct GBTreeModel : public Model { std::vector > trees; /*! \brief for the update process, a place to keep the initial trees */ std::vector > trees_to_update; - /*! \brief some information indicator of the tree, reserved */ + /** + * \brief Group index for trees. + */ std::vector tree_info; + /** + * \brief Number of trees accumulated for each iteration. + */ + std::vector iteration_indptr{0}; private: + /** + * \brief Whether the stack contains multi-target tree. + */ Context const* ctx_; }; } // namespace gbm diff --git a/src/learner.cc b/src/learner.cc index 50d54c9fc..8808c3392 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -45,7 +45,7 @@ #include "common/timer.h" // for Monitor #include "common/version.h" // for Version #include "dmlc/endian.h" // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP -#include "xgboost/base.h" // for Args, bst_float, GradientPair, bst_feature_t +#include "xgboost/base.h" // for Args, bst_float, GradientPair, bst_feature_t, ... #include "xgboost/context.h" // for Context #include "xgboost/data.h" // for DMatrix, MetaInfo #include "xgboost/gbm.h" // for GradientBooster @@ -1247,19 +1247,19 @@ class LearnerImpl : public LearnerIO { return gbm_->DumpModel(fmap, with_stats, format); } - Learner* Slice(int32_t begin_layer, int32_t end_layer, int32_t step, + Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, bool* out_of_bound) override { this->Configure(); this->CheckModelInitialized(); CHECK_NE(this->learner_model_param_.num_feature, 0); - CHECK_GE(begin_layer, 0); + CHECK_GE(begin, 0); auto* out_impl = new LearnerImpl({}); out_impl->learner_model_param_.Copy(this->learner_model_param_); out_impl->ctx_ = this->ctx_; auto gbm = std::unique_ptr(GradientBooster::Create( this->tparam_.booster, &out_impl->ctx_, &out_impl->learner_model_param_)); - this->gbm_->Slice(begin_layer, end_layer, step, gbm.get(), out_of_bound); + this->gbm_->Slice(begin, end, step, gbm.get(), out_of_bound); out_impl->gbm_ = std::move(gbm); Json config{Object()}; diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 3d5dfbd67..fe6fea02f 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -287,7 +287,6 @@ void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &mod linalg::TensorView out_predt) { auto &thread_temp = *p_thread_temp; - CHECK_EQ(model.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far"; // parallel over local batch const auto nsize = static_cast(batch.Size()); const int num_feature = model.learner_model_param->num_feature; @@ -515,7 +514,6 @@ class ColumnSplitHelper { void PredictBatchKernel(DataView batch, std::vector *out_preds) { auto const num_group = model_.learner_model_param->num_output_group; - CHECK_EQ(model_.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far"; // parallel over local batch auto const nsize = batch.Size(); auto const num_feature = model_.learner_model_param->num_feature; @@ -736,8 +734,7 @@ class CPUPredictor : public Predictor { if (ntree_limit == 0 || ntree_limit > model.trees.size()) { ntree_limit = static_cast(model.trees.size()); } - out_preds->resize(model.learner_model_param->num_output_group * - (model.param.size_leaf_vector + 1)); + out_preds->resize(model.learner_model_param->num_output_group); auto base_score = model.learner_model_param->BaseScore(ctx_)(0); // loop over output groups for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) { diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 4a5c5b104..2439a277f 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -342,7 +342,6 @@ class DeviceModel { void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end, int32_t gpu_id) { dh::safe_cuda(cudaSetDevice(gpu_id)); - CHECK_EQ(model.param.size_leaf_vector, 0); // Copy decision trees to device tree_segments = std::move(HostDeviceVector({}, gpu_id)); auto& h_tree_segments = tree_segments.HostVector(); diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 925a5fb76..0a79fbebc 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -677,9 +677,6 @@ template void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, std::vector const &partitioner, linalg::VectorView out_preds) { - CHECK_GT(out_preds.Size(), 0U); - - CHECK(p_last_tree); auto const &tree = *p_last_tree; CHECK_EQ(out_preds.DeviceIdx(), Context::kCpuId); size_t n_nodes = p_last_tree->GetNodes().size(); @@ -687,7 +684,7 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, CHECK_EQ(part.Size(), n_nodes); common::BlockedSpace2d space( part.Size(), [&](size_t node) { return part[node].Size(); }, 1024); - common::ParallelFor2d(space, ctx->Threads(), [&](size_t nidx, common::Range1d r) { + common::ParallelFor2d(space, ctx->Threads(), [&](bst_node_t nidx, common::Range1d r) { if (!tree[nidx].IsDeleted() && tree[nidx].IsLeaf()) { auto const &rowset = part[nidx]; auto leaf_value = tree[nidx].LeafValue(); @@ -698,5 +695,42 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, }); } } + +template +void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, + std::vector const &partitioner, + linalg::MatrixView out_preds) { + CHECK_GT(out_preds.Size(), 0U); + CHECK(p_last_tree); + + auto const &tree = *p_last_tree; + if (!tree.IsMultiTarget()) { + UpdatePredictionCacheImpl(ctx, p_last_tree, partitioner, out_preds.Slice(linalg::All(), 0)); + return; + } + + auto const *mttree = tree.GetMultiTargetTree(); + auto n_nodes = mttree->Size(); + auto n_targets = tree.NumTargets(); + CHECK_EQ(out_preds.Shape(1), n_targets); + CHECK_EQ(out_preds.DeviceIdx(), Context::kCpuId); + + for (auto &part : partitioner) { + CHECK_EQ(part.Size(), n_nodes); + common::BlockedSpace2d space( + part.Size(), [&](size_t node) { return part[node].Size(); }, 1024); + common::ParallelFor2d(space, ctx->Threads(), [&](bst_node_t nidx, common::Range1d r) { + if (tree.IsLeaf(nidx)) { + auto const &rowset = part[nidx]; + auto leaf_value = mttree->LeafValue(nidx); + for (std::size_t const *it = rowset.begin + r.begin(); it < rowset.begin + r.end(); ++it) { + for (std::size_t i = 0; i < n_targets; ++i) { + out_preds(*it, i) += leaf_value(i); + } + } + } + }); + } +} } // namespace xgboost::tree #endif // XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index fd636d3a3..d6bc23f44 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -116,7 +116,7 @@ class GloablApproxBuilder { return nodes.front(); } - void UpdatePredictionCache(DMatrix const *data, linalg::VectorView out_preds) const { + void UpdatePredictionCache(DMatrix const *data, linalg::MatrixView out_preds) const { monitor_->Start(__func__); // Caching prediction seems redundant for approx tree method, as sketching takes up // majority of training time. @@ -303,7 +303,7 @@ class GlobalApproxUpdater : public TreeUpdater { } } - bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView out_preds) override { + bool UpdatePredictionCache(const DMatrix *data, linalg::MatrixView out_preds) override { if (data != cached_ || !pimpl_) { return false; } diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 54ff7ea1a..2d1b7a24d 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -517,7 +517,7 @@ struct GPUHistMakerDevice { }); } - bool UpdatePredictionCache(linalg::VectorView out_preds_d, RegTree const* p_tree) { + bool UpdatePredictionCache(linalg::MatrixView out_preds_d, RegTree const* p_tree) { if (positions.empty()) { return false; } @@ -535,11 +535,12 @@ struct GPUHistMakerDevice { h_nodes.size() * sizeof(RegTree::Node), cudaMemcpyHostToDevice, ctx_->CUDACtx()->Stream())); auto d_nodes = dh::ToSpan(nodes); + CHECK_EQ(out_preds_d.Shape(1), 1); dh::LaunchN(d_position.size(), ctx_->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t idx) mutable { bst_node_t nidx = d_position[idx]; auto weight = d_nodes[nidx].LeafValue(); - out_preds_d(idx) += weight; + out_preds_d(idx, 0) += weight; }); return true; } @@ -858,7 +859,7 @@ class GPUHistMaker : public TreeUpdater { } bool UpdatePredictionCache(const DMatrix* data, - linalg::VectorView p_out_preds) override { + linalg::MatrixView p_out_preds) override { if (maker == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != data) { return false; } diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 012b8e781..8387177aa 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -125,6 +125,7 @@ class MultiTargetHistBuilder { std::vector partitioner_; // Pointer to last updated tree, used for update prediction cache. RegTree const *p_last_tree_{nullptr}; + DMatrix const * p_last_fmat_{nullptr}; ObjInfo const *task_{nullptr}; @@ -147,6 +148,7 @@ class MultiTargetHistBuilder { void InitData(DMatrix *p_fmat, RegTree const *p_tree) { monitor_->Start(__func__); + p_last_fmat_ = p_fmat; std::size_t page_id = 0; bst_bin_t n_total_bins = 0; partitioner_.clear(); @@ -312,6 +314,19 @@ class MultiTargetHistBuilder { task_{task} { monitor_->Init(__func__); } + + bool UpdatePredictionCache(DMatrix const *data, linalg::MatrixView out_preds) const { + // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in + // conjunction with Update(). + if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { + return false; + } + monitor_->Start(__func__); + CHECK_EQ(out_preds.Size(), data->Info().num_row_ * p_last_tree_->NumTargets()); + UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds); + monitor_->Stop(__func__); + return true; + } }; class HistBuilder { @@ -347,7 +362,7 @@ class HistBuilder { monitor_->Init(__func__); } - bool UpdatePredictionCache(DMatrix const *data, linalg::VectorView out_preds) const { + bool UpdatePredictionCache(DMatrix const *data, linalg::MatrixView out_preds) const { // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in // conjunction with Update(). if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { @@ -582,12 +597,11 @@ class QuantileHistMaker : public TreeUpdater { } } - bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView out_preds) override { + bool UpdatePredictionCache(const DMatrix *data, linalg::MatrixView out_preds) override { if (p_impl_) { return p_impl_->UpdatePredictionCache(data, out_preds); } else if (p_mtimpl_) { - // Not yet supported. - return false; + return p_mtimpl_->UpdatePredictionCache(data, out_preds); } else { return false; } diff --git a/tests/cpp/common/test_json.cc b/tests/cpp/common/test_json.cc index cf8bcd81d..3e2038e13 100644 --- a/tests/cpp/common/test_json.cc +++ b/tests/cpp/common/test_json.cc @@ -1,5 +1,5 @@ -/*! - * Copyright (c) by Contributors 2019-2022 +/** + * Copyright (c) 2019-2023, XGBoost Contributors */ #include @@ -8,7 +8,8 @@ #include "../../../src/common/charconv.h" #include "../../../src/common/io.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../src/common/threading_utils.h" // for ParallelFor +#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" #include "dmlc/logging.h" #include "xgboost/json.h" diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc index 270eacf21..93d0cf525 100644 --- a/tests/cpp/gbm/test_gbtree.cc +++ b/tests/cpp/gbm/test_gbtree.cc @@ -505,7 +505,7 @@ TEST(GBTree, PredictRange) { auto h_out_predt_full = out_predt->HostVector(); ASSERT_TRUE(std::equal(h_out_predt.begin(), h_out_predt.end(), h_out_predt_full.begin())); - + // Out of range. ASSERT_THROW(learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits::quiet_NaN(), &out_predt, 0, 3), dmlc::Error); diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index 49813f1d0..27742bf6b 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -557,23 +557,6 @@ std::unique_ptr CreateSparsePageDMatrixWithRC( return dmat; } -gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx, - size_t n_classes) { - gbm::GBTreeModel model(param, ctx); - - for (size_t i = 0; i < n_classes; ++i) { - std::vector> trees; - trees.push_back(std::unique_ptr(new RegTree)); - if (i == 0) { - (*trees.back())[0].SetLeaf(1.5f); - (*trees.back()).Stat(0).sum_hess = 1.0f; - } - model.CommitModel(std::move(trees), i); - } - - return model; -} - std::unique_ptr CreateTrainedGBM(std::string name, Args kwargs, size_t kRows, size_t kCols, LearnerModelParam const* learner_model_param, diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index c83544413..9d820e4b3 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -9,8 +9,10 @@ #include #include #include +#include // for LearnerModelParam +#include // for Configurable -#include // std::int32_t +#include // std::int32_t #include #include #include @@ -22,7 +24,6 @@ #include "../../src/collective/communicator-inl.h" #include "../../src/common/common.h" #include "../../src/data/array_interface.h" -#include "../../src/gbm/gbtree_model.h" #include "filesystem.h" // dmlc::TemporaryDirectory #include "xgboost/linalg.h" @@ -362,9 +363,6 @@ std::unique_ptr CreateSparsePageDMatrixWithRC( size_t n_rows, size_t n_cols, size_t page_size, bool deterministic, const dmlc::TemporaryDirectory& tempdir = dmlc::TemporaryDirectory()); -gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx, - size_t n_classes = 1); - std::unique_ptr CreateTrainedGBM(std::string name, Args kwargs, size_t kRows, size_t kCols, LearnerModelParam const* learner_model_param, diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index 4a3293dbe..fecb5028a 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2017-2022 XGBoost contributors +/** + * Copyright 2017-2023, XGBoost contributors */ #include #include @@ -155,7 +155,7 @@ TEST(GPUPredictor, ShapStump) { std::vector> trees; trees.push_back(std::unique_ptr(new RegTree)); - model.CommitModel(std::move(trees), 0); + model.CommitModelGroup(std::move(trees), 0); auto gpu_lparam = CreateEmptyGenericParam(0); std::unique_ptr gpu_predictor = std::unique_ptr( @@ -183,7 +183,7 @@ TEST(GPUPredictor, Shap) { std::vector> trees; trees.push_back(std::unique_ptr(new RegTree)); trees[0]->ExpandNode(0, 0, 0.5, true, 1.0, -1.0, 1.0, 0.0, 5.0, 2.0, 3.0); - model.CommitModel(std::move(trees), 0); + model.CommitModelGroup(std::move(trees), 0); auto gpu_lparam = CreateEmptyGenericParam(0); auto cpu_lparam = CreateEmptyGenericParam(-1); diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc index d6cf33445..575a85497 100644 --- a/tests/cpp/predictor/test_predictor.cc +++ b/tests/cpp/predictor/test_predictor.cc @@ -209,7 +209,7 @@ void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind, p_tree->ExpandCategorical(0, split_ind, split_cats, true, 1.5f, left_weight, right_weight, 3.0f, 2.2f, 7.0f, 9.0f); - model->CommitModel(std::move(trees), 0); + model->CommitModelGroup(std::move(trees), 0); } void TestCategoricalPrediction(std::string name) { @@ -445,7 +445,7 @@ void TestVectorLeafPrediction(Context const *ctx) { ASSERT_TRUE(mparam.IsVectorLeaf()); gbm::GBTreeModel model{&mparam, ctx}; - model.CommitModel(std::move(trees), 0); + model.CommitModelGroup(std::move(trees), 0); auto run_test = [&](float expected, HostDeviceVector *p_data) { { diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h index 56c1523a1..302c6bfae 100644 --- a/tests/cpp/predictor/test_predictor.h +++ b/tests/cpp/predictor/test_predictor.h @@ -14,6 +14,23 @@ #include "../helpers.h" namespace xgboost { +inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx, + size_t n_classes = 1) { + gbm::GBTreeModel model(param, ctx); + + for (size_t i = 0; i < n_classes; ++i) { + std::vector> trees; + trees.push_back(std::unique_ptr(new RegTree)); + if (i == 0) { + (*trees.back())[0].SetLeaf(1.5f); + (*trees.back()).Stat(0).sum_hess = 1.0f; + } + model.CommitModelGroup(std::move(trees), i); + } + + return model; +} + template void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols, std::shared_ptr p_hist) { diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc index 15765f09f..731f85563 100644 --- a/tests/cpp/test_serialization.cc +++ b/tests/cpp/test_serialization.cc @@ -1,7 +1,10 @@ -// Copyright (c) 2019-2022 by Contributors +/** + * Copyright (c) 2019-2023, XGBoost Contributors + */ #include #include #include +#include // for FeatureMap #include #include diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index ed21230ed..003347c8e 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -246,7 +246,7 @@ void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, std::vector> position(1); hist_maker.Update(¶m, gpair, dmat, common::Span>{position}, {tree}); - auto cache = linalg::VectorView{preds->DeviceSpan(), {preds->Size()}, 0}; + auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1); hist_maker.UpdatePredictionCache(dmat, cache); } diff --git a/tests/cpp/tree/test_prediction_cache.cc b/tests/cpp/tree/test_prediction_cache.cc index 4f5a05eb6..1877b7a35 100644 --- a/tests/cpp/tree/test_prediction_cache.cc +++ b/tests/cpp/tree/test_prediction_cache.cc @@ -15,15 +15,17 @@ namespace xgboost { class TestPredictionCache : public ::testing::Test { std::shared_ptr Xy_; - size_t n_samples_{2048}; + std::size_t n_samples_{2048}; protected: void SetUp() override { - size_t n_features = 13; - Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.GenerateDMatrix(true); + std::size_t n_features = 13; + bst_target_t n_targets = 3; + Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true); } - void RunLearnerTest(std::string updater_name, float subsample, std::string grow_policy) { + void RunLearnerTest(std::string updater_name, float subsample, std::string const& grow_policy, + std::string const& strategy) { std::unique_ptr learner{Learner::Create({Xy_})}; if (updater_name == "grow_gpu_hist") { // gpu_id setup @@ -31,6 +33,7 @@ class TestPredictionCache : public ::testing::Test { } else { learner->SetParam("updater", updater_name); } + learner->SetParam("multi_strategy", strategy); learner->SetParam("grow_policy", grow_policy); learner->SetParam("subsample", std::to_string(subsample)); learner->SetParam("nthread", "0"); @@ -62,7 +65,7 @@ class TestPredictionCache : public ::testing::Test { } } - void RunTest(std::string updater_name) { + void RunTest(std::string const& updater_name, std::string const& strategy) { { Context ctx; ctx.InitAllowUnknown(Args{{"nthread", "8"}}); @@ -85,28 +88,31 @@ class TestPredictionCache : public ::testing::Test { HostDeviceVector out_prediction_cached; out_prediction_cached.SetDevice(ctx.gpu_id); out_prediction_cached.Resize(n_samples_); - auto cache = linalg::VectorView{ctx.gpu_id == Context::kCpuId - ? out_prediction_cached.HostSpan() - : out_prediction_cached.DeviceSpan(), - {out_prediction_cached.Size()}, - ctx.gpu_id}; + auto cache = + linalg::MakeTensorView(&ctx, &out_prediction_cached, out_prediction_cached.Size(), 1); ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache)); } for (auto policy : {"depthwise", "lossguide"}) { for (auto subsample : {1.0f, 0.4f}) { - this->RunLearnerTest(updater_name, subsample, policy); - this->RunLearnerTest(updater_name, subsample, policy); + this->RunLearnerTest(updater_name, subsample, policy, strategy); + this->RunLearnerTest(updater_name, subsample, policy, strategy); } } } }; -TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker"); } +TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker", "one_output_per_tree"); } -TEST_F(TestPredictionCache, Hist) { this->RunTest("grow_quantile_histmaker"); } +TEST_F(TestPredictionCache, Hist) { + this->RunTest("grow_quantile_histmaker", "one_output_per_tree"); +} + +TEST_F(TestPredictionCache, HistMulti) { + this->RunTest("grow_quantile_histmaker", "multi_output_tree"); +} #if defined(XGBOOST_USE_CUDA) -TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist"); } +TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist", "one_output_per_tree"); } #endif // defined(XGBOOST_USE_CUDA) } // namespace xgboost diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index d03ce142b..516cbd6cf 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -524,7 +524,7 @@ class TestModels: booster[-1:0] # we do not accept empty slice. - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Empty slice"): booster[1:1] # stop can not be smaller than begin with pytest.raises(ValueError, match=r"Invalid.*"): @@ -615,6 +615,46 @@ class TestModels: booster = xgb.Booster(model_file=bytesarray) self.run_slice(booster, dtrain, num_parallel_tree, num_classes, num_boost_round) + def test_slice_multi(self) -> None: + from sklearn.datasets import make_classification + + num_classes = 3 + X, y = make_classification( + n_samples=1000, n_informative=5, n_classes=num_classes + ) + Xy = xgb.DMatrix(data=X, label=y) + num_parallel_tree = 4 + num_boost_round = 16 + + class ResetStrategy(xgb.callback.TrainingCallback): + def after_iteration(self, model, epoch: int, evals_log) -> bool: + model.set_param({"multi_strategy": "multi_output_tree"}) + return False + + booster = xgb.train( + { + "num_parallel_tree": num_parallel_tree, + "num_class": num_classes, + "booster": "gbtree", + "objective": "multi:softprob", + "multi_strategy": "multi_output_tree", + "tree_method": "hist", + "base_score": 0, + }, + num_boost_round=num_boost_round, + dtrain=Xy, + callbacks=[ResetStrategy()] + ) + sliced = [t for t in booster] + assert len(sliced) == 16 + + predt0 = booster.predict(Xy, output_margin=True) + predt1 = np.zeros(predt0.shape) + for t in booster: + predt1 += t.predict(Xy, output_margin=True) + + np.testing.assert_allclose(predt0, predt1, atol=1e-5) + @pytest.mark.skipif(**tm.no_pandas()) def test_feature_info(self): import pandas as pd From 401ce5cf5e8a0395d9dfb350d32cad17188d1847 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 28 Mar 2023 00:47:28 +0800 Subject: [PATCH 5/5] Run linters with the multi output demo. (#8966) --- demo/guide-python/multioutput_regression.py | 4 ++-- tests/ci_build/lint_python.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/demo/guide-python/multioutput_regression.py b/demo/guide-python/multioutput_regression.py index 078ec6b7d..7450fd30a 100644 --- a/demo/guide-python/multioutput_regression.py +++ b/demo/guide-python/multioutput_regression.py @@ -46,7 +46,7 @@ def gen_circle() -> Tuple[np.ndarray, np.ndarray]: return X, y -def rmse_model(plot_result: bool, strategy: str): +def rmse_model(plot_result: bool, strategy: str) -> None: """Draw a circle with 2-dim coordinate as target variables.""" X, y = gen_circle() # Train a regressor on it @@ -120,10 +120,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--plot", choices=[0, 1], type=int, default=1) args = parser.parse_args() + # Train with builtin RMSE objective # - One model per output. rmse_model(args.plot == 1, "one_output_per_tree") - # - One model for all outputs, this is still working in progress, many features are # missing. rmse_model(args.plot == 1, "multi_output_tree") diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index b7864bb50..d248e14df 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -161,6 +161,7 @@ def main(args: argparse.Namespace) -> None: "demo/guide-python/spark_estimator_examples.py", "demo/guide-python/individual_trees.py", "demo/guide-python/quantile_regression.py", + "demo/guide-python/multioutput_regression.py", # CI "tests/ci_build/lint_python.py", "tests/ci_build/test_r_package.py", @@ -204,6 +205,7 @@ def main(args: argparse.Namespace) -> None: "demo/guide-python/feature_weights.py", "demo/guide-python/individual_trees.py", "demo/guide-python/quantile_regression.py", + "demo/guide-python/multioutput_regression.py", # tests "tests/python/test_dt.py", "tests/python/test_data_iterator.py",