[CI] Fix PyLint errors. (#10837)
This commit is contained in:
parent
982ee34658
commit
68a8865bc5
@ -23,7 +23,13 @@ from typing import (
|
|||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
from . import collective
|
from . import collective
|
||||||
from .core import Booster, DMatrix, XGBoostError, _parse_eval_str
|
from .core import (
|
||||||
|
Booster,
|
||||||
|
DMatrix,
|
||||||
|
XGBoostError,
|
||||||
|
_deprecate_positional_args,
|
||||||
|
_parse_eval_str,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"TrainingCallback",
|
"TrainingCallback",
|
||||||
@ -346,8 +352,10 @@ class EarlyStopping(TrainingCallback):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
|
@_deprecate_positional_args
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
*,
|
||||||
rounds: int,
|
rounds: int,
|
||||||
metric_name: Optional[str] = None,
|
metric_name: Optional[str] = None,
|
||||||
data_name: Optional[str] = None,
|
data_name: Optional[str] = None,
|
||||||
@ -375,7 +383,7 @@ class EarlyStopping(TrainingCallback):
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
def _update_rounds(
|
def _update_rounds(
|
||||||
self, score: _Score, name: str, metric: str, model: _Model, epoch: int
|
self, *, score: _Score, name: str, metric: str, model: _Model, epoch: int
|
||||||
) -> bool:
|
) -> bool:
|
||||||
def get_s(value: _Score) -> float:
|
def get_s(value: _Score) -> float:
|
||||||
"""get score if it's cross validation history."""
|
"""get score if it's cross validation history."""
|
||||||
@ -471,7 +479,9 @@ class EarlyStopping(TrainingCallback):
|
|||||||
|
|
||||||
# The latest score
|
# The latest score
|
||||||
score = data_log[metric_name][-1]
|
score = data_log[metric_name][-1]
|
||||||
return self._update_rounds(score, data_name, metric_name, model, epoch)
|
return self._update_rounds(
|
||||||
|
score=score, name=data_name, metric=metric_name, model=model, epoch=epoch
|
||||||
|
)
|
||||||
|
|
||||||
def after_training(self, model: _Model) -> _Model:
|
def after_training(self, model: _Model) -> _Model:
|
||||||
if not self.save_best:
|
if not self.save_best:
|
||||||
|
|||||||
@ -907,7 +907,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
|
|||||||
return
|
return
|
||||||
|
|
||||||
handle, feature_names, feature_types = dispatch_data_backend(
|
handle, feature_names, feature_types = dispatch_data_backend(
|
||||||
data,
|
data=data,
|
||||||
missing=self.missing,
|
missing=self.missing,
|
||||||
threads=self.nthread,
|
threads=self.nthread,
|
||||||
feature_names=feature_names,
|
feature_names=feature_names,
|
||||||
@ -1697,6 +1697,7 @@ class ExtMemQuantileDMatrix(DMatrix):
|
|||||||
def __init__( # pylint: disable=super-init-not-called
|
def __init__( # pylint: disable=super-init-not-called
|
||||||
self,
|
self,
|
||||||
data: DataIter,
|
data: DataIter,
|
||||||
|
*,
|
||||||
missing: Optional[float] = None,
|
missing: Optional[float] = None,
|
||||||
nthread: Optional[int] = None,
|
nthread: Optional[int] = None,
|
||||||
max_bin: Optional[int] = None,
|
max_bin: Optional[int] = None,
|
||||||
@ -2355,9 +2356,11 @@ class Booster:
|
|||||||
return self.eval_set([(data, name)], iteration)
|
return self.eval_set([(data, name)], iteration)
|
||||||
|
|
||||||
# pylint: disable=too-many-function-args
|
# pylint: disable=too-many-function-args
|
||||||
|
@_deprecate_positional_args
|
||||||
def predict(
|
def predict(
|
||||||
self,
|
self,
|
||||||
data: DMatrix,
|
data: DMatrix,
|
||||||
|
*,
|
||||||
output_margin: bool = False,
|
output_margin: bool = False,
|
||||||
pred_leaf: bool = False,
|
pred_leaf: bool = False,
|
||||||
pred_contribs: bool = False,
|
pred_contribs: bool = False,
|
||||||
@ -2490,9 +2493,11 @@ class Booster:
|
|||||||
return _prediction_output(shape, dims, preds, False)
|
return _prediction_output(shape, dims, preds, False)
|
||||||
|
|
||||||
# pylint: disable=too-many-statements
|
# pylint: disable=too-many-statements
|
||||||
|
@_deprecate_positional_args
|
||||||
def inplace_predict(
|
def inplace_predict(
|
||||||
self,
|
self,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
|
*,
|
||||||
iteration_range: IterationRange = (0, 0),
|
iteration_range: IterationRange = (0, 0),
|
||||||
predict_type: str = "value",
|
predict_type: str = "value",
|
||||||
missing: float = np.nan,
|
missing: float = np.nan,
|
||||||
|
|||||||
@ -339,8 +339,8 @@ class DaskDMatrix:
|
|||||||
|
|
||||||
self._init = client.sync(
|
self._init = client.sync(
|
||||||
self._map_local_data,
|
self._map_local_data,
|
||||||
client,
|
client=client,
|
||||||
data,
|
data=data,
|
||||||
label=label,
|
label=label,
|
||||||
weights=weight,
|
weights=weight,
|
||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
@ -355,6 +355,7 @@ class DaskDMatrix:
|
|||||||
|
|
||||||
async def _map_local_data(
|
async def _map_local_data(
|
||||||
self,
|
self,
|
||||||
|
*,
|
||||||
client: "distributed.Client",
|
client: "distributed.Client",
|
||||||
data: _DataT,
|
data: _DataT,
|
||||||
label: Optional[_DaskCollection] = None,
|
label: Optional[_DaskCollection] = None,
|
||||||
@ -589,6 +590,7 @@ class DaskPartitionIter(DataIter): # pylint: disable=R0902
|
|||||||
self,
|
self,
|
||||||
data: List[Any],
|
data: List[Any],
|
||||||
label: Optional[List[Any]] = None,
|
label: Optional[List[Any]] = None,
|
||||||
|
*,
|
||||||
weight: Optional[List[Any]] = None,
|
weight: Optional[List[Any]] = None,
|
||||||
base_margin: Optional[List[Any]] = None,
|
base_margin: Optional[List[Any]] = None,
|
||||||
qid: Optional[List[Any]] = None,
|
qid: Optional[List[Any]] = None,
|
||||||
@ -712,6 +714,7 @@ class DaskQuantileDMatrix(DaskDMatrix):
|
|||||||
|
|
||||||
|
|
||||||
def _create_quantile_dmatrix(
|
def _create_quantile_dmatrix(
|
||||||
|
*,
|
||||||
feature_names: Optional[FeatureNames],
|
feature_names: Optional[FeatureNames],
|
||||||
feature_types: Optional[Union[Any, List[Any]]],
|
feature_types: Optional[Union[Any, List[Any]]],
|
||||||
feature_weights: Optional[Any],
|
feature_weights: Optional[Any],
|
||||||
@ -757,6 +760,7 @@ def _create_quantile_dmatrix(
|
|||||||
|
|
||||||
|
|
||||||
def _create_dmatrix(
|
def _create_dmatrix(
|
||||||
|
*,
|
||||||
feature_names: Optional[FeatureNames],
|
feature_names: Optional[FeatureNames],
|
||||||
feature_types: Optional[Union[Any, List[Any]]],
|
feature_types: Optional[Union[Any, List[Any]]],
|
||||||
feature_weights: Optional[Any],
|
feature_weights: Optional[Any],
|
||||||
@ -927,6 +931,7 @@ def _get_dmatrices(
|
|||||||
|
|
||||||
|
|
||||||
async def _train_async(
|
async def _train_async(
|
||||||
|
*,
|
||||||
client: "distributed.Client",
|
client: "distributed.Client",
|
||||||
global_config: Dict[str, Any],
|
global_config: Dict[str, Any],
|
||||||
dconfig: Optional[Dict[str, Any]],
|
dconfig: Optional[Dict[str, Any]],
|
||||||
@ -947,7 +952,7 @@ async def _train_async(
|
|||||||
_rabit_args = await _get_rabit_args(len(workers), dconfig, client)
|
_rabit_args = await _get_rabit_args(len(workers), dconfig, client)
|
||||||
_check_distributed_params(params)
|
_check_distributed_params(params)
|
||||||
|
|
||||||
def dispatched_train(
|
def dispatched_train( # pylint: disable=too-many-positional-arguments
|
||||||
parameters: Dict,
|
parameters: Dict,
|
||||||
rabit_args: Dict[str, Union[str, int]],
|
rabit_args: Dict[str, Union[str, int]],
|
||||||
train_id: int,
|
train_id: int,
|
||||||
@ -1115,6 +1120,7 @@ def _maybe_dataframe(
|
|||||||
|
|
||||||
|
|
||||||
async def _direct_predict_impl( # pylint: disable=too-many-branches
|
async def _direct_predict_impl( # pylint: disable=too-many-branches
|
||||||
|
*,
|
||||||
mapped_predict: Callable,
|
mapped_predict: Callable,
|
||||||
booster: "distributed.Future",
|
booster: "distributed.Future",
|
||||||
data: _DataT,
|
data: _DataT,
|
||||||
@ -1249,6 +1255,7 @@ async def _predict_async(
|
|||||||
global_config: Dict[str, Any],
|
global_config: Dict[str, Any],
|
||||||
model: Union[Booster, Dict, "distributed.Future"],
|
model: Union[Booster, Dict, "distributed.Future"],
|
||||||
data: _DataT,
|
data: _DataT,
|
||||||
|
*,
|
||||||
output_margin: bool,
|
output_margin: bool,
|
||||||
missing: float,
|
missing: float,
|
||||||
pred_leaf: bool,
|
pred_leaf: bool,
|
||||||
@ -1304,7 +1311,12 @@ async def _predict_async(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
return await _direct_predict_impl(
|
return await _direct_predict_impl(
|
||||||
mapped_predict, _booster, data, None, _output_shape, meta
|
mapped_predict=mapped_predict,
|
||||||
|
booster=_booster,
|
||||||
|
data=data,
|
||||||
|
base_margin=None,
|
||||||
|
output_shape=_output_shape,
|
||||||
|
meta=meta,
|
||||||
)
|
)
|
||||||
|
|
||||||
output_shape, _ = await client.compute(
|
output_shape, _ = await client.compute(
|
||||||
@ -1392,10 +1404,12 @@ async def _predict_async(
|
|||||||
return predictions
|
return predictions
|
||||||
|
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def predict( # pylint: disable=unused-argument
|
def predict( # pylint: disable=unused-argument
|
||||||
client: Optional["distributed.Client"],
|
client: Optional["distributed.Client"],
|
||||||
model: Union[TrainReturnT, Booster, "distributed.Future"],
|
model: Union[TrainReturnT, Booster, "distributed.Future"],
|
||||||
data: Union[DaskDMatrix, _DataT],
|
data: Union[DaskDMatrix, _DataT],
|
||||||
|
*,
|
||||||
output_margin: bool = False,
|
output_margin: bool = False,
|
||||||
missing: float = numpy.nan,
|
missing: float = numpy.nan,
|
||||||
pred_leaf: bool = False,
|
pred_leaf: bool = False,
|
||||||
@ -1447,6 +1461,7 @@ def predict( # pylint: disable=unused-argument
|
|||||||
|
|
||||||
|
|
||||||
async def _inplace_predict_async( # pylint: disable=too-many-branches
|
async def _inplace_predict_async( # pylint: disable=too-many-branches
|
||||||
|
*,
|
||||||
client: "distributed.Client",
|
client: "distributed.Client",
|
||||||
global_config: Dict[str, Any],
|
global_config: Dict[str, Any],
|
||||||
model: Union[Booster, Dict, "distributed.Future"],
|
model: Union[Booster, Dict, "distributed.Future"],
|
||||||
@ -1501,14 +1516,21 @@ async def _inplace_predict_async( # pylint: disable=too-many-branches
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
return await _direct_predict_impl(
|
return await _direct_predict_impl(
|
||||||
mapped_predict, booster, data, base_margin, shape, meta
|
mapped_predict=mapped_predict,
|
||||||
|
booster=booster,
|
||||||
|
data=data,
|
||||||
|
base_margin=base_margin,
|
||||||
|
output_shape=shape,
|
||||||
|
meta=meta,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def inplace_predict( # pylint: disable=unused-argument
|
def inplace_predict( # pylint: disable=unused-argument
|
||||||
client: Optional["distributed.Client"],
|
client: Optional["distributed.Client"],
|
||||||
model: Union[TrainReturnT, Booster, "distributed.Future"],
|
model: Union[TrainReturnT, Booster, "distributed.Future"],
|
||||||
data: _DataT,
|
data: _DataT,
|
||||||
|
*,
|
||||||
iteration_range: IterationRange = (0, 0),
|
iteration_range: IterationRange = (0, 0),
|
||||||
predict_type: str = "value",
|
predict_type: str = "value",
|
||||||
missing: float = numpy.nan,
|
missing: float = numpy.nan,
|
||||||
@ -1615,6 +1637,7 @@ class DaskScikitLearnBase(XGBModel):
|
|||||||
async def _predict_async(
|
async def _predict_async(
|
||||||
self,
|
self,
|
||||||
data: _DataT,
|
data: _DataT,
|
||||||
|
*,
|
||||||
output_margin: bool,
|
output_margin: bool,
|
||||||
validate_features: bool,
|
validate_features: bool,
|
||||||
base_margin: Optional[_DaskCollection],
|
base_margin: Optional[_DaskCollection],
|
||||||
@ -1652,9 +1675,11 @@ class DaskScikitLearnBase(XGBModel):
|
|||||||
)
|
)
|
||||||
return predts
|
return predts
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def predict(
|
def predict(
|
||||||
self,
|
self,
|
||||||
X: _DataT,
|
X: _DataT,
|
||||||
|
*,
|
||||||
output_margin: bool = False,
|
output_margin: bool = False,
|
||||||
validate_features: bool = True,
|
validate_features: bool = True,
|
||||||
base_margin: Optional[_DaskCollection] = None,
|
base_margin: Optional[_DaskCollection] = None,
|
||||||
@ -1765,6 +1790,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
|||||||
self,
|
self,
|
||||||
X: _DataT,
|
X: _DataT,
|
||||||
y: _DaskCollection,
|
y: _DaskCollection,
|
||||||
|
*,
|
||||||
sample_weight: Optional[_DaskCollection],
|
sample_weight: Optional[_DaskCollection],
|
||||||
base_margin: Optional[_DaskCollection],
|
base_margin: Optional[_DaskCollection],
|
||||||
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
|
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
|
||||||
@ -1855,6 +1881,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
self,
|
self,
|
||||||
X: _DataT,
|
X: _DataT,
|
||||||
y: _DaskCollection,
|
y: _DaskCollection,
|
||||||
|
*,
|
||||||
sample_weight: Optional[_DaskCollection],
|
sample_weight: Optional[_DaskCollection],
|
||||||
base_margin: Optional[_DaskCollection],
|
base_margin: Optional[_DaskCollection],
|
||||||
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
|
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
|
||||||
@ -1999,13 +2026,18 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
async def _predict_async(
|
async def _predict_async(
|
||||||
self,
|
self,
|
||||||
data: _DataT,
|
data: _DataT,
|
||||||
|
*,
|
||||||
output_margin: bool,
|
output_margin: bool,
|
||||||
validate_features: bool,
|
validate_features: bool,
|
||||||
base_margin: Optional[_DaskCollection],
|
base_margin: Optional[_DaskCollection],
|
||||||
iteration_range: Optional[IterationRange],
|
iteration_range: Optional[IterationRange],
|
||||||
) -> _DaskCollection:
|
) -> _DaskCollection:
|
||||||
pred_probs = await super()._predict_async(
|
pred_probs = await super()._predict_async(
|
||||||
data, output_margin, validate_features, base_margin, iteration_range
|
data,
|
||||||
|
output_margin=output_margin,
|
||||||
|
validate_features=validate_features,
|
||||||
|
base_margin=base_margin,
|
||||||
|
iteration_range=iteration_range,
|
||||||
)
|
)
|
||||||
if output_margin:
|
if output_margin:
|
||||||
return pred_probs
|
return pred_probs
|
||||||
@ -2049,6 +2081,7 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
|
|||||||
self,
|
self,
|
||||||
X: _DataT,
|
X: _DataT,
|
||||||
y: _DaskCollection,
|
y: _DaskCollection,
|
||||||
|
*,
|
||||||
group: Optional[_DaskCollection],
|
group: Optional[_DaskCollection],
|
||||||
qid: Optional[_DaskCollection],
|
qid: Optional[_DaskCollection],
|
||||||
sample_weight: Optional[_DaskCollection],
|
sample_weight: Optional[_DaskCollection],
|
||||||
|
|||||||
@ -128,6 +128,7 @@ def transform_scipy_sparse(data: DataType, is_csr: bool) -> DataType:
|
|||||||
|
|
||||||
|
|
||||||
def _from_scipy_csr(
|
def _from_scipy_csr(
|
||||||
|
*,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
nthread: int,
|
nthread: int,
|
||||||
@ -176,6 +177,7 @@ def is_scipy_csc(data: DataType) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _from_scipy_csc(
|
def _from_scipy_csc(
|
||||||
|
*,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
nthread: int,
|
nthread: int,
|
||||||
@ -251,6 +253,7 @@ def _maybe_np_slice(data: DataType, dtype: Optional[NumpyDType]) -> np.ndarray:
|
|||||||
|
|
||||||
|
|
||||||
def _from_numpy_array(
|
def _from_numpy_array(
|
||||||
|
*,
|
||||||
data: np.ndarray,
|
data: np.ndarray,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
nthread: int,
|
nthread: int,
|
||||||
@ -639,6 +642,7 @@ def _meta_from_pandas_df(
|
|||||||
|
|
||||||
|
|
||||||
def _from_pandas_df(
|
def _from_pandas_df(
|
||||||
|
*,
|
||||||
data: DataFrame,
|
data: DataFrame,
|
||||||
enable_categorical: bool,
|
enable_categorical: bool,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
@ -698,6 +702,7 @@ def _is_modin_series(data: DataType) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _from_pandas_series(
|
def _from_pandas_series(
|
||||||
|
*,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
nthread: int,
|
nthread: int,
|
||||||
@ -712,11 +717,11 @@ def _from_pandas_series(
|
|||||||
if enable_categorical and is_pd_cat_dtype(data.dtype):
|
if enable_categorical and is_pd_cat_dtype(data.dtype):
|
||||||
data = data.cat.codes
|
data = data.cat.codes
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
data.values.reshape(data.shape[0], 1).astype("float"),
|
data=data.values.reshape(data.shape[0], 1).astype("float"),
|
||||||
missing,
|
missing=missing,
|
||||||
nthread,
|
nthread=nthread,
|
||||||
feature_names,
|
feature_names=feature_names,
|
||||||
feature_types,
|
feature_types=feature_types,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -768,6 +773,7 @@ def _transform_dt_df(
|
|||||||
|
|
||||||
|
|
||||||
def _from_dt_df(
|
def _from_dt_df(
|
||||||
|
*,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
missing: Optional[FloatCompatible],
|
missing: Optional[FloatCompatible],
|
||||||
nthread: int,
|
nthread: int,
|
||||||
@ -778,7 +784,11 @@ def _from_dt_df(
|
|||||||
if enable_categorical:
|
if enable_categorical:
|
||||||
raise ValueError("categorical data in datatable is not supported yet.")
|
raise ValueError("categorical data in datatable is not supported yet.")
|
||||||
data, feature_names, feature_types = _transform_dt_df(
|
data, feature_names, feature_types = _transform_dt_df(
|
||||||
data, feature_names, feature_types, None, None
|
data=data,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
meta=None,
|
||||||
|
meta_type=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
ptrs = (ctypes.c_void_p * data.ncols)()
|
ptrs = (ctypes.c_void_p * data.ncols)()
|
||||||
@ -968,6 +978,7 @@ def _transform_cudf_df(
|
|||||||
|
|
||||||
|
|
||||||
def _from_cudf_df(
|
def _from_cudf_df(
|
||||||
|
*,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
nthread: int,
|
nthread: int,
|
||||||
@ -1095,6 +1106,7 @@ def _is_list(data: DataType) -> TypeGuard[list]:
|
|||||||
|
|
||||||
|
|
||||||
def _from_list(
|
def _from_list(
|
||||||
|
*,
|
||||||
data: Sequence,
|
data: Sequence,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
n_threads: int,
|
n_threads: int,
|
||||||
@ -1105,7 +1117,12 @@ def _from_list(
|
|||||||
array = np.array(data)
|
array = np.array(data)
|
||||||
_check_data_shape(data)
|
_check_data_shape(data)
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
array, missing, n_threads, feature_names, feature_types, data_split_mode
|
data=array,
|
||||||
|
missing=missing,
|
||||||
|
nthread=n_threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1114,6 +1131,7 @@ def _is_tuple(data: DataType) -> TypeGuard[tuple]:
|
|||||||
|
|
||||||
|
|
||||||
def _from_tuple(
|
def _from_tuple(
|
||||||
|
*,
|
||||||
data: Sequence,
|
data: Sequence,
|
||||||
missing: FloatCompatible,
|
missing: FloatCompatible,
|
||||||
n_threads: int,
|
n_threads: int,
|
||||||
@ -1122,7 +1140,12 @@ def _from_tuple(
|
|||||||
data_split_mode: DataSplitMode = DataSplitMode.ROW,
|
data_split_mode: DataSplitMode = DataSplitMode.ROW,
|
||||||
) -> DispatchedDataBackendReturnType:
|
) -> DispatchedDataBackendReturnType:
|
||||||
return _from_list(
|
return _from_list(
|
||||||
data, missing, n_threads, feature_names, feature_types, data_split_mode
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
n_threads=n_threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1153,6 +1176,7 @@ def _convert_unknown_data(data: DataType) -> DataType:
|
|||||||
|
|
||||||
|
|
||||||
def dispatch_data_backend(
|
def dispatch_data_backend(
|
||||||
|
*,
|
||||||
data: DataType,
|
data: DataType,
|
||||||
missing: FloatCompatible, # Or Optional[Float]
|
missing: FloatCompatible, # Or Optional[Float]
|
||||||
threads: int,
|
threads: int,
|
||||||
@ -1166,34 +1190,59 @@ def dispatch_data_backend(
|
|||||||
_check_data_shape(data)
|
_check_data_shape(data)
|
||||||
if is_scipy_csr(data):
|
if is_scipy_csr(data):
|
||||||
return _from_scipy_csr(
|
return _from_scipy_csr(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if is_scipy_csc(data):
|
if is_scipy_csc(data):
|
||||||
return _from_scipy_csc(
|
return _from_scipy_csc(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if is_scipy_coo(data):
|
if is_scipy_coo(data):
|
||||||
return _from_scipy_csr(
|
return _from_scipy_csr(
|
||||||
data.tocsr(),
|
data=data.tocsr(),
|
||||||
missing,
|
missing=missing,
|
||||||
threads,
|
nthread=threads,
|
||||||
feature_names,
|
feature_names=feature_names,
|
||||||
feature_types,
|
feature_types=feature_types,
|
||||||
data_split_mode,
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if _is_np_array_like(data):
|
if _is_np_array_like(data):
|
||||||
return _from_numpy_array(
|
return _from_numpy_array(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if _is_uri(data):
|
if _is_uri(data):
|
||||||
return _from_uri(data, missing, feature_names, feature_types, data_split_mode)
|
return _from_uri(data, missing, feature_names, feature_types, data_split_mode)
|
||||||
if _is_list(data):
|
if _is_list(data):
|
||||||
return _from_list(
|
return _from_list(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
n_threads=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if _is_tuple(data):
|
if _is_tuple(data):
|
||||||
return _from_tuple(
|
return _from_tuple(
|
||||||
data, missing, threads, feature_names, feature_types, data_split_mode
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
n_threads=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if _is_arrow(data):
|
if _is_arrow(data):
|
||||||
data = _arrow_transform(data)
|
data = _arrow_transform(data)
|
||||||
@ -1203,17 +1252,22 @@ def dispatch_data_backend(
|
|||||||
data = pd.DataFrame(data)
|
data = pd.DataFrame(data)
|
||||||
if _is_pandas_df(data):
|
if _is_pandas_df(data):
|
||||||
return _from_pandas_df(
|
return _from_pandas_df(
|
||||||
data,
|
data=data,
|
||||||
enable_categorical,
|
enable_categorical=enable_categorical,
|
||||||
missing,
|
missing=missing,
|
||||||
threads,
|
nthread=threads,
|
||||||
feature_names,
|
feature_names=feature_names,
|
||||||
feature_types,
|
feature_types=feature_types,
|
||||||
data_split_mode,
|
data_split_mode=data_split_mode,
|
||||||
)
|
)
|
||||||
if _is_cudf_df(data) or _is_cudf_ser(data):
|
if _is_cudf_df(data) or _is_cudf_ser(data):
|
||||||
return _from_cudf_df(
|
return _from_cudf_df(
|
||||||
data, missing, threads, feature_names, feature_types, enable_categorical
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
enable_categorical=enable_categorical,
|
||||||
)
|
)
|
||||||
if _is_cupy_alike(data):
|
if _is_cupy_alike(data):
|
||||||
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
|
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
|
||||||
@ -1226,24 +1280,49 @@ def dispatch_data_backend(
|
|||||||
if _is_dt_df(data):
|
if _is_dt_df(data):
|
||||||
_warn_unused_missing(data, missing)
|
_warn_unused_missing(data, missing)
|
||||||
return _from_dt_df(
|
return _from_dt_df(
|
||||||
data, missing, threads, feature_names, feature_types, enable_categorical
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
enable_categorical=enable_categorical,
|
||||||
)
|
)
|
||||||
if _is_modin_df(data):
|
if _is_modin_df(data):
|
||||||
return _from_pandas_df(
|
return _from_pandas_df(
|
||||||
data, enable_categorical, missing, threads, feature_names, feature_types
|
data=data,
|
||||||
|
enable_categorical=enable_categorical,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
)
|
)
|
||||||
if _is_modin_series(data):
|
if _is_modin_series(data):
|
||||||
return _from_pandas_series(
|
return _from_pandas_series(
|
||||||
data, missing, threads, enable_categorical, feature_names, feature_types
|
data=data,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
enable_categorical=enable_categorical,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
)
|
)
|
||||||
if _has_array_protocol(data):
|
if _has_array_protocol(data):
|
||||||
array = np.asarray(data)
|
array = np.asarray(data)
|
||||||
return _from_numpy_array(array, missing, threads, feature_names, feature_types)
|
return _from_numpy_array(
|
||||||
|
data=array,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
|
)
|
||||||
|
|
||||||
converted = _convert_unknown_data(data)
|
converted = _convert_unknown_data(data)
|
||||||
if converted is not None:
|
if converted is not None:
|
||||||
return _from_scipy_csr(
|
return _from_scipy_csr(
|
||||||
converted, missing, threads, feature_names, feature_types
|
data=converted,
|
||||||
|
missing=missing,
|
||||||
|
nthread=threads,
|
||||||
|
feature_names=feature_names,
|
||||||
|
feature_types=feature_types,
|
||||||
)
|
)
|
||||||
|
|
||||||
raise TypeError("Not supported type for data." + str(type(data)))
|
raise TypeError("Not supported type for data." + str(type(data)))
|
||||||
@ -1313,7 +1392,9 @@ def _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -
|
|||||||
def _meta_from_dt(
|
def _meta_from_dt(
|
||||||
data: DataType, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
|
data: DataType, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
|
||||||
) -> None:
|
) -> None:
|
||||||
data, _, _ = _transform_dt_df(data, None, None, field, dtype)
|
data, _, _ = _transform_dt_df(
|
||||||
|
data=data, feature_names=None, feature_types=None, meta=field, meta_type=dtype
|
||||||
|
)
|
||||||
_meta_from_numpy(data, field, dtype, handle)
|
_meta_from_numpy(data, field, dtype, handle)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import ctypes
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
from .core import _LIB, _check_call, make_jcargs
|
from .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs
|
||||||
from .tracker import RabitTracker
|
from .tracker import RabitTracker
|
||||||
|
|
||||||
|
|
||||||
@ -34,10 +34,12 @@ class FederatedTracker(RabitTracker):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def __init__( # pylint: disable=R0913, W0231
|
def __init__( # pylint: disable=R0913, W0231
|
||||||
self,
|
self,
|
||||||
n_workers: int,
|
n_workers: int,
|
||||||
port: int,
|
port: int,
|
||||||
|
*,
|
||||||
secure: bool,
|
secure: bool,
|
||||||
server_key_path: Optional[str] = None,
|
server_key_path: Optional[str] = None,
|
||||||
server_cert_path: Optional[str] = None,
|
server_cert_path: Optional[str] = None,
|
||||||
@ -59,9 +61,11 @@ class FederatedTracker(RabitTracker):
|
|||||||
self.handle = handle
|
self.handle = handle
|
||||||
|
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def run_federated_server( # pylint: disable=too-many-arguments
|
def run_federated_server( # pylint: disable=too-many-arguments
|
||||||
n_workers: int,
|
n_workers: int,
|
||||||
port: int,
|
port: int,
|
||||||
|
*,
|
||||||
server_key_path: Optional[str] = None,
|
server_key_path: Optional[str] = None,
|
||||||
server_cert_path: Optional[str] = None,
|
server_cert_path: Optional[str] = None,
|
||||||
client_cert_path: Optional[str] = None,
|
client_cert_path: Optional[str] = None,
|
||||||
|
|||||||
@ -8,15 +8,17 @@ from typing import Any, Optional, Union
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ._typing import PathLike
|
from ._typing import PathLike
|
||||||
from .core import Booster
|
from .core import Booster, _deprecate_positional_args
|
||||||
from .sklearn import XGBModel
|
from .sklearn import XGBModel
|
||||||
|
|
||||||
Axes = Any # real type is matplotlib.axes.Axes
|
Axes = Any # real type is matplotlib.axes.Axes
|
||||||
GraphvizSource = Any # real type is graphviz.Source
|
GraphvizSource = Any # real type is graphviz.Source
|
||||||
|
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def plot_importance(
|
def plot_importance(
|
||||||
booster: Union[XGBModel, Booster, dict],
|
booster: Union[XGBModel, Booster, dict],
|
||||||
|
*,
|
||||||
ax: Optional[Axes] = None,
|
ax: Optional[Axes] = None,
|
||||||
height: float = 0.2,
|
height: float = 0.2,
|
||||||
xlim: Optional[tuple] = None,
|
xlim: Optional[tuple] = None,
|
||||||
@ -146,8 +148,10 @@ def plot_importance(
|
|||||||
return ax
|
return ax
|
||||||
|
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def to_graphviz(
|
def to_graphviz(
|
||||||
booster: Union[Booster, XGBModel],
|
booster: Union[Booster, XGBModel],
|
||||||
|
*,
|
||||||
fmap: PathLike = "",
|
fmap: PathLike = "",
|
||||||
num_trees: int = 0,
|
num_trees: int = 0,
|
||||||
rankdir: Optional[str] = None,
|
rankdir: Optional[str] = None,
|
||||||
|
|||||||
@ -582,6 +582,7 @@ Parameters
|
|||||||
|
|
||||||
|
|
||||||
def _wrap_evaluation_matrices(
|
def _wrap_evaluation_matrices(
|
||||||
|
*,
|
||||||
missing: float,
|
missing: float,
|
||||||
X: Any,
|
X: Any,
|
||||||
y: Any,
|
y: Any,
|
||||||
@ -696,8 +697,10 @@ DEFAULT_N_ESTIMATORS = 100
|
|||||||
)
|
)
|
||||||
class XGBModel(XGBModelBase):
|
class XGBModel(XGBModelBase):
|
||||||
# pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring
|
# pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring
|
||||||
|
@_deprecate_positional_args
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
*,
|
||||||
max_depth: Optional[int] = None,
|
max_depth: Optional[int] = None,
|
||||||
max_leaves: Optional[int] = None,
|
max_leaves: Optional[int] = None,
|
||||||
max_bin: Optional[int] = None,
|
max_bin: Optional[int] = None,
|
||||||
@ -1174,9 +1177,11 @@ class XGBModel(XGBModelBase):
|
|||||||
iteration_range = (0, 0)
|
iteration_range = (0, 0)
|
||||||
return iteration_range
|
return iteration_range
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def predict(
|
def predict(
|
||||||
self,
|
self,
|
||||||
X: ArrayLike,
|
X: ArrayLike,
|
||||||
|
*,
|
||||||
output_margin: bool = False,
|
output_margin: bool = False,
|
||||||
validate_features: bool = True,
|
validate_features: bool = True,
|
||||||
base_margin: Optional[ArrayLike] = None,
|
base_margin: Optional[ArrayLike] = None,
|
||||||
@ -1587,9 +1592,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
"Fit gradient boosting model", "Fit gradient boosting classifier", 1
|
"Fit gradient boosting model", "Fit gradient boosting classifier", 1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def predict(
|
def predict(
|
||||||
self,
|
self,
|
||||||
X: ArrayLike,
|
X: ArrayLike,
|
||||||
|
*,
|
||||||
output_margin: bool = False,
|
output_margin: bool = False,
|
||||||
validate_features: bool = True,
|
validate_features: bool = True,
|
||||||
base_margin: Optional[ArrayLike] = None,
|
base_margin: Optional[ArrayLike] = None,
|
||||||
@ -2070,9 +2077,11 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
|||||||
self._set_evaluation_result(evals_result)
|
self._set_evaluation_result(evals_result)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def predict(
|
def predict(
|
||||||
self,
|
self,
|
||||||
X: ArrayLike,
|
X: ArrayLike,
|
||||||
|
*,
|
||||||
output_margin: bool = False,
|
output_margin: bool = False,
|
||||||
validate_features: bool = True,
|
validate_features: bool = True,
|
||||||
base_margin: Optional[ArrayLike] = None,
|
base_margin: Optional[ArrayLike] = None,
|
||||||
@ -2081,9 +2090,9 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
|||||||
X, _ = _get_qid(X, None)
|
X, _ = _get_qid(X, None)
|
||||||
return super().predict(
|
return super().predict(
|
||||||
X,
|
X,
|
||||||
output_margin,
|
output_margin=output_margin,
|
||||||
validate_features,
|
validate_features=validate_features,
|
||||||
base_margin,
|
base_margin=base_margin,
|
||||||
iteration_range=iteration_range,
|
iteration_range=iteration_range,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1072,11 +1072,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
|||||||
with CommunicatorContext(context, **_rabit_args):
|
with CommunicatorContext(context, **_rabit_args):
|
||||||
with xgboost.config_context(verbosity=verbosity):
|
with xgboost.config_context(verbosity=verbosity):
|
||||||
dtrain, dvalid = create_dmatrix_from_partitions(
|
dtrain, dvalid = create_dmatrix_from_partitions(
|
||||||
pandas_df_iter,
|
iterator=pandas_df_iter,
|
||||||
feature_prop.features_cols_names,
|
feature_cols=feature_prop.features_cols_names,
|
||||||
dev_ordinal,
|
dev_ordinal=dev_ordinal,
|
||||||
use_qdm,
|
use_qdm=use_qdm,
|
||||||
dmatrix_kwargs,
|
kwargs=dmatrix_kwargs,
|
||||||
enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,
|
enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,
|
||||||
has_validation_col=feature_prop.has_validation_col,
|
has_validation_col=feature_prop.has_validation_col,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -171,6 +171,7 @@ def make_qdm(
|
|||||||
|
|
||||||
|
|
||||||
def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
||||||
|
*,
|
||||||
iterator: Iterator[pd.DataFrame],
|
iterator: Iterator[pd.DataFrame],
|
||||||
feature_cols: Optional[Sequence[str]],
|
feature_cols: Optional[Sequence[str]],
|
||||||
dev_ordinal: Optional[int],
|
dev_ordinal: Optional[int],
|
||||||
|
|||||||
@ -224,6 +224,7 @@ class IteratorForTest(xgb.core.DataIter):
|
|||||||
X: Sequence,
|
X: Sequence,
|
||||||
y: Sequence,
|
y: Sequence,
|
||||||
w: Optional[Sequence],
|
w: Optional[Sequence],
|
||||||
|
*,
|
||||||
cache: Optional[str],
|
cache: Optional[str],
|
||||||
on_host: bool = False,
|
on_host: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
@ -379,6 +380,7 @@ def make_categorical(
|
|||||||
n_samples: int,
|
n_samples: int,
|
||||||
n_features: int,
|
n_features: int,
|
||||||
n_categories: int,
|
n_categories: int,
|
||||||
|
*,
|
||||||
onehot: bool,
|
onehot: bool,
|
||||||
sparsity: float = 0.0,
|
sparsity: float = 0.0,
|
||||||
cat_ratio: float = 1.0,
|
cat_ratio: float = 1.0,
|
||||||
@ -487,7 +489,9 @@ def _cat_sampled_from() -> strategies.SearchStrategy:
|
|||||||
sparsity = args[3]
|
sparsity = args[3]
|
||||||
return TestDataset(
|
return TestDataset(
|
||||||
f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
|
f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
|
||||||
lambda: make_categorical(n_samples, n_features, n_cats, False, sparsity),
|
lambda: make_categorical(
|
||||||
|
n_samples, n_features, n_cats, onehot=False, sparsity=sparsity
|
||||||
|
),
|
||||||
"reg:squarederror",
|
"reg:squarederror",
|
||||||
"rmse",
|
"rmse",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -22,7 +22,7 @@ def run_mixed_sparsity(device: str) -> None:
|
|||||||
|
|
||||||
X = [cp.array(batch) for batch in X]
|
X = [cp.array(batch) for batch in X]
|
||||||
|
|
||||||
it = tm.IteratorForTest(X, y, None, None, on_host=False)
|
it = tm.IteratorForTest(X, y, None, cache=None, on_host=False)
|
||||||
Xy_0 = xgboost.QuantileDMatrix(it)
|
Xy_0 = xgboost.QuantileDMatrix(it)
|
||||||
|
|
||||||
X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)
|
X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)
|
||||||
|
|||||||
@ -52,6 +52,7 @@ def validate_data_initialization(
|
|||||||
|
|
||||||
# pylint: disable=too-many-arguments,too-many-locals
|
# pylint: disable=too-many-arguments,too-many-locals
|
||||||
def get_feature_weights(
|
def get_feature_weights(
|
||||||
|
*,
|
||||||
X: ArrayLike,
|
X: ArrayLike,
|
||||||
y: ArrayLike,
|
y: ArrayLike,
|
||||||
fw: np.ndarray,
|
fw: np.ndarray,
|
||||||
|
|||||||
@ -291,7 +291,9 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
|
|||||||
|
|
||||||
# categorical
|
# categorical
|
||||||
n_categories = 32
|
n_categories = 32
|
||||||
X, y = tm.make_categorical(n_samples, n_features, n_categories, False, sparsity=0.8)
|
X, y = tm.make_categorical(
|
||||||
|
n_samples, n_features, n_categories, onehot=False, sparsity=0.8
|
||||||
|
)
|
||||||
if use_cupy:
|
if use_cupy:
|
||||||
import cudf # pylint: disable=import-error
|
import cudf # pylint: disable=import-error
|
||||||
import cupy as cp # pylint: disable=import-error
|
import cupy as cp # pylint: disable=import-error
|
||||||
@ -310,7 +312,7 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
|
|||||||
|
|
||||||
# mixed
|
# mixed
|
||||||
X, y = tm.make_categorical(
|
X, y = tm.make_categorical(
|
||||||
n_samples, n_features, n_categories, False, sparsity=0.8, cat_ratio=0.5
|
n_samples, n_features, n_categories, onehot=False, sparsity=0.8, cat_ratio=0.5
|
||||||
)
|
)
|
||||||
n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)])
|
n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)])
|
||||||
n_num_features = n_features - n_cat_features
|
n_num_features = n_features - n_cat_features
|
||||||
@ -340,12 +342,12 @@ USE_PART = 1
|
|||||||
|
|
||||||
|
|
||||||
def check_categorical_ohe( # pylint: disable=too-many-arguments
|
def check_categorical_ohe( # pylint: disable=too-many-arguments
|
||||||
rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str
|
*, rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str
|
||||||
) -> None:
|
) -> None:
|
||||||
"Test for one-hot encoding with categorical data."
|
"Test for one-hot encoding with categorical data."
|
||||||
|
|
||||||
onehot, label = tm.make_categorical(rows, cols, cats, True)
|
onehot, label = tm.make_categorical(rows, cols, cats, onehot=True)
|
||||||
cat, _ = tm.make_categorical(rows, cols, cats, False)
|
cat, _ = tm.make_categorical(rows, cols, cats, onehot=False)
|
||||||
|
|
||||||
by_etl_results: Dict[str, Dict[str, List[float]]] = {}
|
by_etl_results: Dict[str, Dict[str, List[float]]] = {}
|
||||||
by_builtin_results: Dict[str, Dict[str, List[float]]] = {}
|
by_builtin_results: Dict[str, Dict[str, List[float]]] = {}
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import socket
|
|||||||
from enum import IntEnum, unique
|
from enum import IntEnum, unique
|
||||||
from typing import Dict, Optional, Union
|
from typing import Dict, Optional, Union
|
||||||
|
|
||||||
from .core import _LIB, _check_call, make_jcargs
|
from .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs
|
||||||
|
|
||||||
|
|
||||||
def get_family(addr: str) -> int:
|
def get_family(addr: str) -> int:
|
||||||
@ -48,11 +48,13 @@ class RabitTracker:
|
|||||||
HOST = 0
|
HOST = 0
|
||||||
TASK = 1
|
TASK = 1
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def __init__( # pylint: disable=too-many-arguments
|
def __init__( # pylint: disable=too-many-arguments
|
||||||
self,
|
self,
|
||||||
n_workers: int,
|
n_workers: int,
|
||||||
host_ip: Optional[str],
|
host_ip: Optional[str],
|
||||||
port: int = 0,
|
port: int = 0,
|
||||||
|
*,
|
||||||
sortby: str = "host",
|
sortby: str = "host",
|
||||||
timeout: int = 0,
|
timeout: int = 0,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|||||||
@ -288,6 +288,7 @@ def groups_to_rows(groups: np.ndarray, boundaries: np.ndarray) -> np.ndarray:
|
|||||||
|
|
||||||
|
|
||||||
def mkgroupfold(
|
def mkgroupfold(
|
||||||
|
*,
|
||||||
dall: DMatrix,
|
dall: DMatrix,
|
||||||
nfold: int,
|
nfold: int,
|
||||||
param: BoosterParam,
|
param: BoosterParam,
|
||||||
@ -341,6 +342,7 @@ def mkgroupfold(
|
|||||||
|
|
||||||
|
|
||||||
def mknfold(
|
def mknfold(
|
||||||
|
*,
|
||||||
dall: DMatrix,
|
dall: DMatrix,
|
||||||
nfold: int,
|
nfold: int,
|
||||||
param: BoosterParam,
|
param: BoosterParam,
|
||||||
@ -361,7 +363,12 @@ def mknfold(
|
|||||||
# Do standard k-fold cross validation. Automatically determine the folds.
|
# Do standard k-fold cross validation. Automatically determine the folds.
|
||||||
if len(dall.get_uint_info("group_ptr")) > 1:
|
if len(dall.get_uint_info("group_ptr")) > 1:
|
||||||
return mkgroupfold(
|
return mkgroupfold(
|
||||||
dall, nfold, param, evals=evals, fpreproc=fpreproc, shuffle=shuffle
|
dall=dall,
|
||||||
|
nfold=nfold,
|
||||||
|
param=param,
|
||||||
|
evals=evals,
|
||||||
|
fpreproc=fpreproc,
|
||||||
|
shuffle=shuffle,
|
||||||
)
|
)
|
||||||
|
|
||||||
if shuffle is True:
|
if shuffle is True:
|
||||||
@ -407,10 +414,12 @@ def mknfold(
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
@_deprecate_positional_args
|
||||||
def cv(
|
def cv(
|
||||||
params: BoosterParam,
|
params: BoosterParam,
|
||||||
dtrain: DMatrix,
|
dtrain: DMatrix,
|
||||||
num_boost_round: int = 10,
|
num_boost_round: int = 10,
|
||||||
|
*,
|
||||||
nfold: int = 3,
|
nfold: int = 3,
|
||||||
stratified: bool = False,
|
stratified: bool = False,
|
||||||
folds: XGBStratifiedKFold = None,
|
folds: XGBStratifiedKFold = None,
|
||||||
@ -541,7 +550,15 @@ def cv(
|
|||||||
|
|
||||||
results: Dict[str, List[float]] = {}
|
results: Dict[str, List[float]] = {}
|
||||||
cvfolds = mknfold(
|
cvfolds = mknfold(
|
||||||
dtrain, nfold, params, seed, metrics, fpreproc, stratified, folds, shuffle
|
dall=dtrain,
|
||||||
|
nfold=nfold,
|
||||||
|
param=params,
|
||||||
|
seed=seed,
|
||||||
|
evals=metrics,
|
||||||
|
fpreproc=fpreproc,
|
||||||
|
stratified=stratified,
|
||||||
|
folds=folds,
|
||||||
|
shuffle=shuffle,
|
||||||
)
|
)
|
||||||
|
|
||||||
metric_fn = _configure_custom_metric(feval, custom_metric)
|
metric_fn = _configure_custom_metric(feval, custom_metric)
|
||||||
|
|||||||
@ -32,6 +32,7 @@ class LintersPaths:
|
|||||||
"tests/python/test_tree_regularization.py",
|
"tests/python/test_tree_regularization.py",
|
||||||
"tests/python/test_training_continuation.py",
|
"tests/python/test_training_continuation.py",
|
||||||
"tests/python/test_shap.py",
|
"tests/python/test_shap.py",
|
||||||
|
"tests/python/test_updaters.py",
|
||||||
"tests/python/test_model_io.py",
|
"tests/python/test_model_io.py",
|
||||||
"tests/python/test_with_pandas.py",
|
"tests/python/test_with_pandas.py",
|
||||||
"tests/python-gpu/",
|
"tests/python-gpu/",
|
||||||
|
|||||||
@ -195,7 +195,7 @@ class TestFromColumnar:
|
|||||||
@pytest.mark.skipif(**tm.no_cudf())
|
@pytest.mark.skipif(**tm.no_cudf())
|
||||||
def test_cudf_categorical(self) -> None:
|
def test_cudf_categorical(self) -> None:
|
||||||
n_features = 30
|
n_features = 30
|
||||||
_X, _y = tm.make_categorical(100, n_features, 17, False)
|
_X, _y = tm.make_categorical(100, n_features, 17, onehot=False)
|
||||||
X = cudf.from_pandas(_X)
|
X = cudf.from_pandas(_X)
|
||||||
y = cudf.from_pandas(_y)
|
y = cudf.from_pandas(_y)
|
||||||
|
|
||||||
@ -312,7 +312,7 @@ class IterForDMatrixTest(xgb.core.DataIter):
|
|||||||
self._data = []
|
self._data = []
|
||||||
self._labels = []
|
self._labels = []
|
||||||
for i in range(self.BATCHES):
|
for i in range(self.BATCHES):
|
||||||
X, y = tm.make_categorical(self.ROWS_PER_BATCH, 4, 13, False)
|
X, y = tm.make_categorical(self.ROWS_PER_BATCH, 4, 13, onehot=False)
|
||||||
self._data.append(cudf.from_pandas(X))
|
self._data.append(cudf.from_pandas(X))
|
||||||
self._labels.append(y)
|
self._labels.append(y)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -405,7 +405,7 @@ class TestGPUPredict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def test_shap_categorical(self):
|
def test_shap_categorical(self):
|
||||||
X, y = tm.make_categorical(100, 20, 7, False)
|
X, y = tm.make_categorical(100, 20, 7, onehot=False)
|
||||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||||
booster = xgb.train(
|
booster = xgb.train(
|
||||||
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
|
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
|
||||||
|
|||||||
@ -140,7 +140,14 @@ class TestGPUUpdaters:
|
|||||||
@settings(deadline=None, max_examples=20, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_categorical_ohe(self, rows, cols, rounds, cats):
|
def test_categorical_ohe(self, rows, cols, rounds, cats):
|
||||||
check_categorical_ohe(rows, cols, rounds, cats, "cuda", "hist")
|
check_categorical_ohe(
|
||||||
|
rows=rows,
|
||||||
|
cols=cols,
|
||||||
|
rounds=rounds,
|
||||||
|
cats=cats,
|
||||||
|
device="cuda",
|
||||||
|
tree_method="hist",
|
||||||
|
)
|
||||||
|
|
||||||
@given(
|
@given(
|
||||||
tm.categorical_dataset_strategy,
|
tm.categorical_dataset_strategy,
|
||||||
@ -222,10 +229,9 @@ class TestGPUUpdaters:
|
|||||||
def test_categorical_32_cat(self):
|
def test_categorical_32_cat(self):
|
||||||
"""32 hits the bound of integer bitset, so special test"""
|
"""32 hits the bound of integer bitset, so special test"""
|
||||||
rows = 1000
|
rows = 1000
|
||||||
cols = 10
|
check_categorical_ohe(
|
||||||
cats = 32
|
rows=rows, cols=10, rounds=4, cats=32, device="cuda", tree_method="hist"
|
||||||
rounds = 4
|
)
|
||||||
check_categorical_ohe(rows, cols, rounds, cats, "cuda", "hist")
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
def test_invalid_category(self):
|
def test_invalid_category(self):
|
||||||
|
|||||||
@ -104,7 +104,7 @@ class TestBoosterIO:
|
|||||||
self.run_model_json_io(parameters, ext)
|
self.run_model_json_io(parameters, ext)
|
||||||
|
|
||||||
def test_categorical_model_io(self) -> None:
|
def test_categorical_model_io(self) -> None:
|
||||||
X, y = tm.make_categorical(256, 16, 71, False)
|
X, y = tm.make_categorical(256, 16, 71, onehot=False)
|
||||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||||
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
|
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
|
||||||
predt_0 = booster.predict(Xy)
|
predt_0 = booster.predict(Xy)
|
||||||
|
|||||||
@ -49,7 +49,7 @@ class TestTreesToDataFrame:
|
|||||||
assert np.allclose(cover_from_dump, cover_from_df)
|
assert np.allclose(cover_from_dump, cover_from_df)
|
||||||
|
|
||||||
def run_tree_to_df_categorical(self, tree_method: str) -> None:
|
def run_tree_to_df_categorical(self, tree_method: str) -> None:
|
||||||
X, y = tm.make_categorical(100, 10, 31, False)
|
X, y = tm.make_categorical(100, 10, 31, onehot=False)
|
||||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||||
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
|
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
|
||||||
df = booster.trees_to_dataframe()
|
df = booster.trees_to_dataframe()
|
||||||
@ -61,7 +61,7 @@ class TestTreesToDataFrame:
|
|||||||
self.run_tree_to_df_categorical("approx")
|
self.run_tree_to_df_categorical("approx")
|
||||||
|
|
||||||
def run_split_value_histograms(self, tree_method) -> None:
|
def run_split_value_histograms(self, tree_method) -> None:
|
||||||
X, y = tm.make_categorical(1000, 10, 13, False)
|
X, y = tm.make_categorical(1000, 10, 13, onehot=False)
|
||||||
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
|
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
|
||||||
reg.fit(X, y)
|
reg.fit(X, y)
|
||||||
|
|
||||||
|
|||||||
@ -97,14 +97,15 @@ class TestQuantileDMatrix:
|
|||||||
|
|
||||||
if sparsity == 0.0:
|
if sparsity == 0.0:
|
||||||
it = IteratorForTest(
|
it = IteratorForTest(
|
||||||
*make_batches(n_samples_per_batch, n_features, n_batches, False), None
|
*make_batches(n_samples_per_batch, n_features, n_batches, False),
|
||||||
|
cache=None,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
it = IteratorForTest(
|
it = IteratorForTest(
|
||||||
*make_batches_sparse(
|
*make_batches_sparse(
|
||||||
n_samples_per_batch, n_features, n_batches, sparsity
|
n_samples_per_batch, n_features, n_batches, sparsity
|
||||||
),
|
),
|
||||||
None,
|
cache=None,
|
||||||
)
|
)
|
||||||
Xy = xgb.QuantileDMatrix(it)
|
Xy = xgb.QuantileDMatrix(it)
|
||||||
assert Xy.num_row() == n_samples_per_batch * n_batches
|
assert Xy.num_row() == n_samples_per_batch * n_batches
|
||||||
@ -134,14 +135,15 @@ class TestQuantileDMatrix:
|
|||||||
n_batches = 7
|
n_batches = 7
|
||||||
if sparsity == 0.0:
|
if sparsity == 0.0:
|
||||||
it = IteratorForTest(
|
it = IteratorForTest(
|
||||||
*make_batches(n_samples_per_batch, n_features, n_batches, False), None
|
*make_batches(n_samples_per_batch, n_features, n_batches, False),
|
||||||
|
cache=None,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
it = IteratorForTest(
|
it = IteratorForTest(
|
||||||
*make_batches_sparse(
|
*make_batches_sparse(
|
||||||
n_samples_per_batch, n_features, n_batches, sparsity
|
n_samples_per_batch, n_features, n_batches, sparsity
|
||||||
),
|
),
|
||||||
None,
|
cache=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
parameters = {"tree_method": "hist", "max_bin": 256}
|
parameters = {"tree_method": "hist", "max_bin": 256}
|
||||||
|
|||||||
@ -81,23 +81,26 @@ class TestTreeMethod:
|
|||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_pruner(self):
|
def test_pruner(self):
|
||||||
import sklearn
|
import sklearn
|
||||||
params = {'tree_method': 'exact'}
|
|
||||||
|
params = {"tree_method": "exact"}
|
||||||
cancer = sklearn.datasets.load_breast_cancer()
|
cancer = sklearn.datasets.load_breast_cancer()
|
||||||
X = cancer['data']
|
X = cancer["data"]
|
||||||
y = cancer["target"]
|
y = cancer["target"]
|
||||||
|
|
||||||
dtrain = xgb.DMatrix(X, y)
|
dtrain = xgb.DMatrix(X, y)
|
||||||
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)
|
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)
|
||||||
grown = str(booster.get_dump())
|
grown = str(booster.get_dump())
|
||||||
|
|
||||||
params = {'updater': 'prune', 'process_type': 'update', 'gamma': '0.2'}
|
params = {"updater": "prune", "process_type": "update", "gamma": "0.2"}
|
||||||
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10,
|
booster = xgb.train(
|
||||||
xgb_model=booster)
|
params, dtrain=dtrain, num_boost_round=10, xgb_model=booster
|
||||||
|
)
|
||||||
after_prune = str(booster.get_dump())
|
after_prune = str(booster.get_dump())
|
||||||
assert grown != after_prune
|
assert grown != after_prune
|
||||||
|
|
||||||
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10,
|
booster = xgb.train(
|
||||||
xgb_model=booster)
|
params, dtrain=dtrain, num_boost_round=10, xgb_model=booster
|
||||||
|
)
|
||||||
second_prune = str(booster.get_dump())
|
second_prune = str(booster.get_dump())
|
||||||
# Second prune should not change the tree
|
# Second prune should not change the tree
|
||||||
assert after_prune == second_prune
|
assert after_prune == second_prune
|
||||||
@ -107,11 +110,12 @@ class TestTreeMethod:
|
|||||||
hist_parameter_strategy,
|
hist_parameter_strategy,
|
||||||
hist_cache_strategy,
|
hist_cache_strategy,
|
||||||
strategies.integers(1, 20),
|
strategies.integers(1, 20),
|
||||||
tm.make_dataset_strategy()
|
tm.make_dataset_strategy(),
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, print_blob=True)
|
||||||
def test_hist(
|
def test_hist(
|
||||||
self, param: Dict[str, Any],
|
self,
|
||||||
|
param: Dict[str, Any],
|
||||||
hist_param: Dict[str, Any],
|
hist_param: Dict[str, Any],
|
||||||
cache_param: Dict[str, Any],
|
cache_param: Dict[str, Any],
|
||||||
num_rounds: int,
|
num_rounds: int,
|
||||||
@ -128,11 +132,13 @@ class TestTreeMethod:
|
|||||||
def test_hist_categorical(self):
|
def test_hist_categorical(self):
|
||||||
# hist must be same as exact on all-categorial data
|
# hist must be same as exact on all-categorial data
|
||||||
ag_dtrain, ag_dtest = tm.load_agaricus(__file__)
|
ag_dtrain, ag_dtest = tm.load_agaricus(__file__)
|
||||||
ag_param = {'max_depth': 2,
|
ag_param = {
|
||||||
'tree_method': 'hist',
|
"max_depth": 2,
|
||||||
'eta': 1,
|
"tree_method": "hist",
|
||||||
'objective': 'binary:logistic',
|
"eta": 1,
|
||||||
'eval_metric': 'auc'}
|
"objective": "binary:logistic",
|
||||||
|
"eval_metric": "auc",
|
||||||
|
}
|
||||||
hist_res = {}
|
hist_res = {}
|
||||||
exact_res = {}
|
exact_res = {}
|
||||||
|
|
||||||
@ -141,7 +147,7 @@ class TestTreeMethod:
|
|||||||
ag_dtrain,
|
ag_dtrain,
|
||||||
10,
|
10,
|
||||||
evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
|
evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
|
||||||
evals_result=hist_res
|
evals_result=hist_res,
|
||||||
)
|
)
|
||||||
ag_param["tree_method"] = "exact"
|
ag_param["tree_method"] = "exact"
|
||||||
xgb.train(
|
xgb.train(
|
||||||
@ -149,10 +155,10 @@ class TestTreeMethod:
|
|||||||
ag_dtrain,
|
ag_dtrain,
|
||||||
10,
|
10,
|
||||||
evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
|
evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
|
||||||
evals_result=exact_res
|
evals_result=exact_res,
|
||||||
)
|
)
|
||||||
assert hist_res['train']['auc'] == exact_res['train']['auc']
|
assert hist_res["train"]["auc"] == exact_res["train"]["auc"]
|
||||||
assert hist_res['test']['auc'] == exact_res['test']['auc']
|
assert hist_res["test"]["auc"] == exact_res["test"]["auc"]
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_hist_degenerate_case(self):
|
def test_hist_degenerate_case(self):
|
||||||
@ -160,11 +166,17 @@ class TestTreeMethod:
|
|||||||
# quantile points for a particular feature (the second feature in
|
# quantile points for a particular feature (the second feature in
|
||||||
# this example). Source: https://github.com/dmlc/xgboost/issues/2943
|
# this example). Source: https://github.com/dmlc/xgboost/issues/2943
|
||||||
nan = np.nan
|
nan = np.nan
|
||||||
param = {'missing': nan, 'tree_method': 'hist'}
|
param = {"missing": nan, "tree_method": "hist"}
|
||||||
model = xgb.XGBRegressor(**param)
|
model = xgb.XGBRegressor(**param)
|
||||||
X = np.array([[6.18827160e+05, 1.73000000e+02], [6.37345679e+05, nan],
|
X = np.array(
|
||||||
[6.38888889e+05, nan], [6.28086420e+05, nan]])
|
[
|
||||||
y = [1000000., 0., 0., 500000.]
|
[6.18827160e05, 1.73000000e02],
|
||||||
|
[6.37345679e05, nan],
|
||||||
|
[6.38888889e05, nan],
|
||||||
|
[6.28086420e05, nan],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
y = [1000000.0, 0.0, 0.0, 500000.0]
|
||||||
w = [0, 0, 1, 0]
|
w = [0, 0, 1, 0]
|
||||||
model.fit(X, y, sample_weight=w)
|
model.fit(X, y, sample_weight=w)
|
||||||
|
|
||||||
@ -174,12 +186,12 @@ class TestTreeMethod:
|
|||||||
param = {"tree_method": "hist", "max_bin": 64}
|
param = {"tree_method": "hist", "max_bin": 64}
|
||||||
hist_result = train_result(param, dataset.get_dmat(), 16)
|
hist_result = train_result(param, dataset.get_dmat(), 16)
|
||||||
note(str(hist_result))
|
note(str(hist_result))
|
||||||
assert tm.non_increasing(hist_result['train'][dataset.metric])
|
assert tm.non_increasing(hist_result["train"][dataset.metric])
|
||||||
|
|
||||||
param = {"tree_method": "approx", "max_bin": 64}
|
param = {"tree_method": "approx", "max_bin": 64}
|
||||||
approx_result = train_result(param, dataset.get_dmat(), 16)
|
approx_result = train_result(param, dataset.get_dmat(), 16)
|
||||||
note(str(approx_result))
|
note(str(approx_result))
|
||||||
assert tm.non_increasing(approx_result['train'][dataset.metric])
|
assert tm.non_increasing(approx_result["train"][dataset.metric])
|
||||||
|
|
||||||
np.testing.assert_allclose(
|
np.testing.assert_allclose(
|
||||||
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
|
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
|
||||||
@ -248,15 +260,33 @@ class TestTreeMethod:
|
|||||||
def test_max_cat(self, tree_method) -> None:
|
def test_max_cat(self, tree_method) -> None:
|
||||||
self.run_max_cat(tree_method)
|
self.run_max_cat(tree_method)
|
||||||
|
|
||||||
@given(strategies.integers(10, 400), strategies.integers(3, 8),
|
@given(
|
||||||
strategies.integers(1, 2), strategies.integers(4, 7))
|
strategies.integers(10, 400),
|
||||||
|
strategies.integers(3, 8),
|
||||||
|
strategies.integers(1, 2),
|
||||||
|
strategies.integers(4, 7),
|
||||||
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, print_blob=True)
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_categorical_ohe(
|
def test_categorical_ohe(
|
||||||
self, rows: int, cols: int, rounds: int, cats: int
|
self, rows: int, cols: int, rounds: int, cats: int
|
||||||
) -> None:
|
) -> None:
|
||||||
check_categorical_ohe(rows, cols, rounds, cats, "cpu", "approx")
|
check_categorical_ohe(
|
||||||
check_categorical_ohe(rows, cols, rounds, cats, "cpu", "hist")
|
rows=rows,
|
||||||
|
cols=cols,
|
||||||
|
rounds=rounds,
|
||||||
|
cats=cats,
|
||||||
|
device="cpu",
|
||||||
|
tree_method="approx",
|
||||||
|
)
|
||||||
|
check_categorical_ohe(
|
||||||
|
rows=rows,
|
||||||
|
cols=cols,
|
||||||
|
rounds=rounds,
|
||||||
|
cats=cats,
|
||||||
|
device="cpu",
|
||||||
|
tree_method="hist",
|
||||||
|
)
|
||||||
|
|
||||||
@given(
|
@given(
|
||||||
tm.categorical_dataset_strategy,
|
tm.categorical_dataset_strategy,
|
||||||
@ -307,7 +337,7 @@ class TestTreeMethod:
|
|||||||
@given(
|
@given(
|
||||||
strategies.integers(10, 400),
|
strategies.integers(10, 400),
|
||||||
strategies.integers(3, 8),
|
strategies.integers(3, 8),
|
||||||
strategies.integers(4, 7)
|
strategies.integers(4, 7),
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, print_blob=True)
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
@ -395,9 +425,8 @@ class TestTreeMethod:
|
|||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"tree_method,weighted", [
|
"tree_method,weighted",
|
||||||
("approx", False), ("hist", False), ("approx", True), ("hist", True)
|
[("approx", False), ("hist", False), ("approx", True), ("hist", True)],
|
||||||
]
|
|
||||||
)
|
)
|
||||||
def test_adaptive(self, tree_method, weighted) -> None:
|
def test_adaptive(self, tree_method, weighted) -> None:
|
||||||
self.run_adaptive(tree_method, weighted)
|
self.run_adaptive(tree_method, weighted)
|
||||||
|
|||||||
@ -1161,14 +1161,24 @@ def test_feature_weights(tree_method):
|
|||||||
|
|
||||||
parser_path = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
|
parser_path = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
|
||||||
poly_increasing = get_feature_weights(
|
poly_increasing = get_feature_weights(
|
||||||
X, y, fw, parser_path, tree_method, xgb.XGBRegressor
|
X=X,
|
||||||
|
y=y,
|
||||||
|
fw=fw,
|
||||||
|
parser_path=parser_path,
|
||||||
|
tree_method=tree_method,
|
||||||
|
model=xgb.XGBRegressor,
|
||||||
)
|
)
|
||||||
|
|
||||||
fw = np.ones(shape=(kCols,))
|
fw = np.ones(shape=(kCols,))
|
||||||
for i in range(kCols):
|
for i in range(kCols):
|
||||||
fw[i] *= float(kCols - i)
|
fw[i] *= float(kCols - i)
|
||||||
poly_decreasing = get_feature_weights(
|
poly_decreasing = get_feature_weights(
|
||||||
X, y, fw, parser_path, tree_method, xgb.XGBRegressor
|
X=X,
|
||||||
|
y=y,
|
||||||
|
fw=fw,
|
||||||
|
parser_path=parser_path,
|
||||||
|
tree_method=tree_method,
|
||||||
|
model=xgb.XGBRegressor,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Approxmated test, this is dependent on the implementation of random
|
# Approxmated test, this is dependent on the implementation of random
|
||||||
|
|||||||
@ -359,7 +359,7 @@ def run_categorical(
|
|||||||
|
|
||||||
def test_categorical(client: "Client") -> None:
|
def test_categorical(client: "Client") -> None:
|
||||||
X, y = make_categorical(client, 10000, 30, 13)
|
X, y = make_categorical(client, 10000, 30, 13)
|
||||||
X_onehot, _ = make_categorical(client, 10000, 30, 13, True)
|
X_onehot, _ = make_categorical(client, 10000, 30, 13, onehot=True)
|
||||||
run_categorical(client, "approx", "cpu", X, X_onehot, y)
|
run_categorical(client, "approx", "cpu", X, X_onehot, y)
|
||||||
run_categorical(client, "hist", "cpu", X, X_onehot, y)
|
run_categorical(client, "hist", "cpu", X, X_onehot, y)
|
||||||
|
|
||||||
@ -1335,7 +1335,7 @@ class TestWithDask:
|
|||||||
def save_dmatrix(rabit_args: Dict[str, Union[int, str]], tmpdir: str) -> None:
|
def save_dmatrix(rabit_args: Dict[str, Union[int, str]], tmpdir: str) -> None:
|
||||||
with xgb.dask.CommunicatorContext(**rabit_args):
|
with xgb.dask.CommunicatorContext(**rabit_args):
|
||||||
rank = xgb.collective.get_rank()
|
rank = xgb.collective.get_rank()
|
||||||
X, y = tm.make_categorical(100, 4, 4, False)
|
X, y = tm.make_categorical(100, 4, 4, onehot=False)
|
||||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||||
path = os.path.join(tmpdir, f"{rank}.bin")
|
path = os.path.join(tmpdir, f"{rank}.bin")
|
||||||
Xy.save_binary(path)
|
Xy.save_binary(path)
|
||||||
@ -1665,7 +1665,12 @@ class TestWithDask:
|
|||||||
fw = da.from_array(fw)
|
fw = da.from_array(fw)
|
||||||
parser = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
|
parser = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
|
||||||
poly_increasing = get_feature_weights(
|
poly_increasing = get_feature_weights(
|
||||||
X, y, fw, parser, "approx", model=xgb.dask.DaskXGBRegressor
|
X=X,
|
||||||
|
y=y,
|
||||||
|
fw=fw,
|
||||||
|
parser_path=parser,
|
||||||
|
tree_method="approx",
|
||||||
|
model=xgb.dask.DaskXGBRegressor,
|
||||||
)
|
)
|
||||||
|
|
||||||
fw = np.ones(shape=(kCols,))
|
fw = np.ones(shape=(kCols,))
|
||||||
@ -1673,7 +1678,12 @@ class TestWithDask:
|
|||||||
fw[i] *= float(kCols - i)
|
fw[i] *= float(kCols - i)
|
||||||
fw = da.from_array(fw)
|
fw = da.from_array(fw)
|
||||||
poly_decreasing = get_feature_weights(
|
poly_decreasing = get_feature_weights(
|
||||||
X, y, fw, parser, "approx", model=xgb.dask.DaskXGBRegressor
|
X=X,
|
||||||
|
y=y,
|
||||||
|
fw=fw,
|
||||||
|
parser_path=parser,
|
||||||
|
tree_method="approx",
|
||||||
|
model=xgb.dask.DaskXGBRegressor,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Approxmated test, this is dependent on the implementation of random
|
# Approxmated test, this is dependent on the implementation of random
|
||||||
|
|||||||
@ -67,8 +67,8 @@ def run_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool, on_gpu: bool) -> None:
|
|||||||
cols = [f"feat-{i}" for i in range(n_features)]
|
cols = [f"feat-{i}" for i in range(n_features)]
|
||||||
feature_cols = cols if is_feature_cols else None
|
feature_cols = cols if is_feature_cols else None
|
||||||
train_Xy, valid_Xy = create_dmatrix_from_partitions(
|
train_Xy, valid_Xy = create_dmatrix_from_partitions(
|
||||||
iter(dfs),
|
iterator=iter(dfs),
|
||||||
feature_cols,
|
feature_cols=feature_cols,
|
||||||
dev_ordinal=device_id,
|
dev_ordinal=device_id,
|
||||||
use_qdm=is_qdm,
|
use_qdm=is_qdm,
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user