Make QuantileDMatrix default to sklearn esitmators. (#8220)

This commit is contained in:
Jiaming Yuan
2022-09-13 13:52:19 +08:00
committed by GitHub
parent a2686543a9
commit bdf265076d
6 changed files with 91 additions and 39 deletions

View File

@@ -726,10 +726,9 @@ def _create_quantile_dmatrix(
if parts is None:
msg = f"worker {worker.address} has an empty DMatrix."
LOGGER.warning(msg)
import cupy
d = QuantileDMatrix(
cupy.zeros((0, 0)),
numpy.empty((0, 0)),
feature_names=feature_names,
feature_types=feature_types,
max_bin=max_bin,
@@ -1544,15 +1543,21 @@ def inplace_predict( # pylint: disable=unused-argument
async def _async_wrap_evaluation_matrices(
client: Optional["distributed.Client"], **kwargs: Any
client: Optional["distributed.Client"],
tree_method: Optional[str],
max_bin: Optional[int],
**kwargs: Any,
) -> Tuple[DaskDMatrix, Optional[List[Tuple[DaskDMatrix, str]]]]:
"""A switch function for async environment."""
def _inner(**kwargs: Any) -> DaskDMatrix:
m = DaskDMatrix(client=client, **kwargs)
return m
def _dispatch(ref: Optional[DaskDMatrix], **kwargs: Any) -> DaskDMatrix:
if tree_method in ("hist", "gpu_hist"):
return DaskQuantileDMatrix(
client=client, ref=ref, max_bin=max_bin, **kwargs
)
return DaskDMatrix(client=client, **kwargs)
train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_inner, **kwargs)
train_dmatrix, evals = _wrap_evaluation_matrices(create_dmatrix=_dispatch, **kwargs)
train_dmatrix = await train_dmatrix
if evals is None:
return train_dmatrix, evals
@@ -1756,6 +1761,8 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
params = self.get_xgb_params()
dtrain, evals = await _async_wrap_evaluation_matrices(
client=self.client,
tree_method=self.tree_method,
max_bin=self.max_bin,
X=X,
y=y,
group=None,
@@ -1851,6 +1858,8 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
params = self.get_xgb_params()
dtrain, evals = await _async_wrap_evaluation_matrices(
self.client,
tree_method=self.tree_method,
max_bin=self.max_bin,
X=X,
y=y,
group=None,
@@ -2057,6 +2066,8 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
params = self.get_xgb_params()
dtrain, evals = await _async_wrap_evaluation_matrices(
self.client,
tree_method=self.tree_method,
max_bin=self.max_bin,
X=X,
y=y,
group=None,

View File

@@ -38,6 +38,7 @@ from .core import (
Booster,
DMatrix,
Metric,
QuantileDMatrix,
XGBoostError,
_convert_ntree_limit,
_deprecate_positional_args,
@@ -430,7 +431,8 @@ def _wrap_evaluation_matrices(
enable_categorical: bool,
feature_types: Optional[FeatureTypes],
) -> Tuple[Any, List[Tuple[Any, str]]]:
"""Convert array_like evaluation matrices into DMatrix. Perform validation on the way."""
"""Convert array_like evaluation matrices into DMatrix. Perform validation on the
way."""
train_dmatrix = create_dmatrix(
data=X,
label=y,
@@ -442,6 +444,7 @@ def _wrap_evaluation_matrices(
missing=missing,
enable_categorical=enable_categorical,
feature_types=feature_types,
ref=None,
)
n_validation = 0 if eval_set is None else len(eval_set)
@@ -491,6 +494,7 @@ def _wrap_evaluation_matrices(
missing=missing,
enable_categorical=enable_categorical,
feature_types=feature_types,
ref=train_dmatrix,
)
evals.append(m)
nevals = len(evals)
@@ -904,6 +908,17 @@ class XGBModel(XGBModelBase):
return model, metric, params, early_stopping_rounds, callbacks
def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
# Use `QuantileDMatrix` to save memory.
if self.tree_method in ("hist", "gpu_hist"):
try:
return QuantileDMatrix(
**kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
)
except TypeError: # `QuantileDMatrix` supports lesser types than DMatrix
pass
return DMatrix(**kwargs, nthread=self.n_jobs)
def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None:
if evals_result:
self.evals_result_ = cast(Dict[str, Dict[str, List[float]]], evals_result)
@@ -996,7 +1011,7 @@ class XGBModel(XGBModelBase):
base_margin_eval_set=base_margin_eval_set,
eval_group=None,
eval_qid=None,
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
create_dmatrix=self._create_dmatrix,
enable_categorical=self.enable_categorical,
feature_types=self.feature_types,
)
@@ -1479,7 +1494,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
base_margin_eval_set=base_margin_eval_set,
eval_group=None,
eval_qid=None,
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
create_dmatrix=self._create_dmatrix,
enable_categorical=self.enable_categorical,
feature_types=self.feature_types,
)
@@ -1930,7 +1945,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
base_margin_eval_set=base_margin_eval_set,
eval_group=eval_group,
eval_qid=eval_qid,
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
create_dmatrix=self._create_dmatrix,
enable_categorical=self.enable_categorical,
feature_types=self.feature_types,
)