Make QuantileDMatrix default to sklearn esitmators. (#8220)
This commit is contained in:
@@ -726,10 +726,9 @@ def _create_quantile_dmatrix(
|
||||
if parts is None:
|
||||
msg = f"worker {worker.address} has an empty DMatrix."
|
||||
LOGGER.warning(msg)
|
||||
import cupy
|
||||
|
||||
d = QuantileDMatrix(
|
||||
cupy.zeros((0, 0)),
|
||||
numpy.empty((0, 0)),
|
||||
feature_names=feature_names,
|
||||
feature_types=feature_types,
|
||||
max_bin=max_bin,
|
||||
@@ -1544,15 +1543,21 @@ def inplace_predict( # pylint: disable=unused-argument
|
||||
|
||||
|
||||
async def _async_wrap_evaluation_matrices(
|
||||
client: Optional["distributed.Client"], **kwargs: Any
|
||||
client: Optional["distributed.Client"],
|
||||
tree_method: Optional[str],
|
||||
max_bin: Optional[int],
|
||||
**kwargs: Any,
|
||||
) -> Tuple[DaskDMatrix, Optional[List[Tuple[DaskDMatrix, str]]]]:
|
||||
"""A switch function for async environment."""
|
||||
|
||||
def _inner(**kwargs: Any) -> DaskDMatrix:
|
||||
m = DaskDMatrix(client=client, **kwargs)
|
||||
return m
|
||||
def _dispatch(ref: Optional[DaskDMatrix], **kwargs: Any) -> DaskDMatrix:
|
||||
if tree_method in ("hist", "gpu_hist"):
|
||||
return DaskQuantileDMatrix(
|
||||
client=client, ref=ref, max_bin=max_bin, **kwargs
|
||||
)
|
||||
return DaskDMatrix(client=client, **kwargs)
|
||||
|
||||
train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_inner, **kwargs)
|
||||
train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_dispatch, **kwargs)
|
||||
train_dmatrix = await train_dmatrix
|
||||
if evals is None:
|
||||
return train_dmatrix, evals
|
||||
@@ -1756,6 +1761,8 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
||||
params = self.get_xgb_params()
|
||||
dtrain, evals = await _async_wrap_evaluation_matrices(
|
||||
client=self.client,
|
||||
tree_method=self.tree_method,
|
||||
max_bin=self.max_bin,
|
||||
X=X,
|
||||
y=y,
|
||||
group=None,
|
||||
@@ -1851,6 +1858,8 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
params = self.get_xgb_params()
|
||||
dtrain, evals = await _async_wrap_evaluation_matrices(
|
||||
self.client,
|
||||
tree_method=self.tree_method,
|
||||
max_bin=self.max_bin,
|
||||
X=X,
|
||||
y=y,
|
||||
group=None,
|
||||
@@ -2057,6 +2066,8 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
|
||||
params = self.get_xgb_params()
|
||||
dtrain, evals = await _async_wrap_evaluation_matrices(
|
||||
self.client,
|
||||
tree_method=self.tree_method,
|
||||
max_bin=self.max_bin,
|
||||
X=X,
|
||||
y=y,
|
||||
group=None,
|
||||
|
||||
@@ -38,6 +38,7 @@ from .core import (
|
||||
Booster,
|
||||
DMatrix,
|
||||
Metric,
|
||||
QuantileDMatrix,
|
||||
XGBoostError,
|
||||
_convert_ntree_limit,
|
||||
_deprecate_positional_args,
|
||||
@@ -430,7 +431,8 @@ def _wrap_evaluation_matrices(
|
||||
enable_categorical: bool,
|
||||
feature_types: Optional[FeatureTypes],
|
||||
) -> Tuple[Any, List[Tuple[Any, str]]]:
|
||||
"""Convert array_like evaluation matrices into DMatrix. Perform validation on the way."""
|
||||
"""Convert array_like evaluation matrices into DMatrix. Perform validation on the
|
||||
way."""
|
||||
train_dmatrix = create_dmatrix(
|
||||
data=X,
|
||||
label=y,
|
||||
@@ -442,6 +444,7 @@ def _wrap_evaluation_matrices(
|
||||
missing=missing,
|
||||
enable_categorical=enable_categorical,
|
||||
feature_types=feature_types,
|
||||
ref=None,
|
||||
)
|
||||
|
||||
n_validation = 0 if eval_set is None else len(eval_set)
|
||||
@@ -491,6 +494,7 @@ def _wrap_evaluation_matrices(
|
||||
missing=missing,
|
||||
enable_categorical=enable_categorical,
|
||||
feature_types=feature_types,
|
||||
ref=train_dmatrix,
|
||||
)
|
||||
evals.append(m)
|
||||
nevals = len(evals)
|
||||
@@ -904,6 +908,17 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
return model, metric, params, early_stopping_rounds, callbacks
|
||||
|
||||
def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
|
||||
# Use `QuantileDMatrix` to save memory.
|
||||
if self.tree_method in ("hist", "gpu_hist"):
|
||||
try:
|
||||
return QuantileDMatrix(
|
||||
**kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
|
||||
)
|
||||
except TypeError: # `QuantileDMatrix` supports lesser types than DMatrix
|
||||
pass
|
||||
return DMatrix(**kwargs, nthread=self.n_jobs)
|
||||
|
||||
def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None:
|
||||
if evals_result:
|
||||
self.evals_result_ = cast(Dict[str, Dict[str, List[float]]], evals_result)
|
||||
@@ -996,7 +1011,7 @@ class XGBModel(XGBModelBase):
|
||||
base_margin_eval_set=base_margin_eval_set,
|
||||
eval_group=None,
|
||||
eval_qid=None,
|
||||
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
|
||||
create_dmatrix=self._create_dmatrix,
|
||||
enable_categorical=self.enable_categorical,
|
||||
feature_types=self.feature_types,
|
||||
)
|
||||
@@ -1479,7 +1494,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
base_margin_eval_set=base_margin_eval_set,
|
||||
eval_group=None,
|
||||
eval_qid=None,
|
||||
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
|
||||
create_dmatrix=self._create_dmatrix,
|
||||
enable_categorical=self.enable_categorical,
|
||||
feature_types=self.feature_types,
|
||||
)
|
||||
@@ -1930,7 +1945,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
||||
base_margin_eval_set=base_margin_eval_set,
|
||||
eval_group=eval_group,
|
||||
eval_qid=eval_qid,
|
||||
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
|
||||
create_dmatrix=self._create_dmatrix,
|
||||
enable_categorical=self.enable_categorical,
|
||||
feature_types=self.feature_types,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user