[CI] Fix PyLint errors. (#10837)

This commit is contained in:
Jiaming Yuan 2024-09-24 14:09:32 +08:00 committed by GitHub
parent 982ee34658
commit 68a8865bc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 349 additions and 118 deletions

View File

@ -23,7 +23,13 @@ from typing import (
import numpy import numpy
from . import collective from . import collective
from .core import Booster, DMatrix, XGBoostError, _parse_eval_str from .core import (
Booster,
DMatrix,
XGBoostError,
_deprecate_positional_args,
_parse_eval_str,
)
__all__ = [ __all__ = [
"TrainingCallback", "TrainingCallback",
@ -346,8 +352,10 @@ class EarlyStopping(TrainingCallback):
""" """
# pylint: disable=too-many-arguments # pylint: disable=too-many-arguments
@_deprecate_positional_args
def __init__( def __init__(
self, self,
*,
rounds: int, rounds: int,
metric_name: Optional[str] = None, metric_name: Optional[str] = None,
data_name: Optional[str] = None, data_name: Optional[str] = None,
@ -375,7 +383,7 @@ class EarlyStopping(TrainingCallback):
return model return model
def _update_rounds( def _update_rounds(
self, score: _Score, name: str, metric: str, model: _Model, epoch: int self, *, score: _Score, name: str, metric: str, model: _Model, epoch: int
) -> bool: ) -> bool:
def get_s(value: _Score) -> float: def get_s(value: _Score) -> float:
"""get score if it's cross validation history.""" """get score if it's cross validation history."""
@ -471,7 +479,9 @@ class EarlyStopping(TrainingCallback):
# The latest score # The latest score
score = data_log[metric_name][-1] score = data_log[metric_name][-1]
return self._update_rounds(score, data_name, metric_name, model, epoch) return self._update_rounds(
score=score, name=data_name, metric=metric_name, model=model, epoch=epoch
)
def after_training(self, model: _Model) -> _Model: def after_training(self, model: _Model) -> _Model:
if not self.save_best: if not self.save_best:

View File

@ -907,7 +907,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
return return
handle, feature_names, feature_types = dispatch_data_backend( handle, feature_names, feature_types = dispatch_data_backend(
data, data=data,
missing=self.missing, missing=self.missing,
threads=self.nthread, threads=self.nthread,
feature_names=feature_names, feature_names=feature_names,
@ -1697,6 +1697,7 @@ class ExtMemQuantileDMatrix(DMatrix):
def __init__( # pylint: disable=super-init-not-called def __init__( # pylint: disable=super-init-not-called
self, self,
data: DataIter, data: DataIter,
*,
missing: Optional[float] = None, missing: Optional[float] = None,
nthread: Optional[int] = None, nthread: Optional[int] = None,
max_bin: Optional[int] = None, max_bin: Optional[int] = None,
@ -2355,9 +2356,11 @@ class Booster:
return self.eval_set([(data, name)], iteration) return self.eval_set([(data, name)], iteration)
# pylint: disable=too-many-function-args # pylint: disable=too-many-function-args
@_deprecate_positional_args
def predict( def predict(
self, self,
data: DMatrix, data: DMatrix,
*,
output_margin: bool = False, output_margin: bool = False,
pred_leaf: bool = False, pred_leaf: bool = False,
pred_contribs: bool = False, pred_contribs: bool = False,
@ -2490,9 +2493,11 @@ class Booster:
return _prediction_output(shape, dims, preds, False) return _prediction_output(shape, dims, preds, False)
# pylint: disable=too-many-statements # pylint: disable=too-many-statements
@_deprecate_positional_args
def inplace_predict( def inplace_predict(
self, self,
data: DataType, data: DataType,
*,
iteration_range: IterationRange = (0, 0), iteration_range: IterationRange = (0, 0),
predict_type: str = "value", predict_type: str = "value",
missing: float = np.nan, missing: float = np.nan,

View File

@ -339,8 +339,8 @@ class DaskDMatrix:
self._init = client.sync( self._init = client.sync(
self._map_local_data, self._map_local_data,
client, client=client,
data, data=data,
label=label, label=label,
weights=weight, weights=weight,
base_margin=base_margin, base_margin=base_margin,
@ -355,6 +355,7 @@ class DaskDMatrix:
async def _map_local_data( async def _map_local_data(
self, self,
*,
client: "distributed.Client", client: "distributed.Client",
data: _DataT, data: _DataT,
label: Optional[_DaskCollection] = None, label: Optional[_DaskCollection] = None,
@ -589,6 +590,7 @@ class DaskPartitionIter(DataIter): # pylint: disable=R0902
self, self,
data: List[Any], data: List[Any],
label: Optional[List[Any]] = None, label: Optional[List[Any]] = None,
*,
weight: Optional[List[Any]] = None, weight: Optional[List[Any]] = None,
base_margin: Optional[List[Any]] = None, base_margin: Optional[List[Any]] = None,
qid: Optional[List[Any]] = None, qid: Optional[List[Any]] = None,
@ -712,6 +714,7 @@ class DaskQuantileDMatrix(DaskDMatrix):
def _create_quantile_dmatrix( def _create_quantile_dmatrix(
*,
feature_names: Optional[FeatureNames], feature_names: Optional[FeatureNames],
feature_types: Optional[Union[Any, List[Any]]], feature_types: Optional[Union[Any, List[Any]]],
feature_weights: Optional[Any], feature_weights: Optional[Any],
@ -757,6 +760,7 @@ def _create_quantile_dmatrix(
def _create_dmatrix( def _create_dmatrix(
*,
feature_names: Optional[FeatureNames], feature_names: Optional[FeatureNames],
feature_types: Optional[Union[Any, List[Any]]], feature_types: Optional[Union[Any, List[Any]]],
feature_weights: Optional[Any], feature_weights: Optional[Any],
@ -927,6 +931,7 @@ def _get_dmatrices(
async def _train_async( async def _train_async(
*,
client: "distributed.Client", client: "distributed.Client",
global_config: Dict[str, Any], global_config: Dict[str, Any],
dconfig: Optional[Dict[str, Any]], dconfig: Optional[Dict[str, Any]],
@ -947,7 +952,7 @@ async def _train_async(
_rabit_args = await _get_rabit_args(len(workers), dconfig, client) _rabit_args = await _get_rabit_args(len(workers), dconfig, client)
_check_distributed_params(params) _check_distributed_params(params)
def dispatched_train( def dispatched_train( # pylint: disable=too-many-positional-arguments
parameters: Dict, parameters: Dict,
rabit_args: Dict[str, Union[str, int]], rabit_args: Dict[str, Union[str, int]],
train_id: int, train_id: int,
@ -1115,6 +1120,7 @@ def _maybe_dataframe(
async def _direct_predict_impl( # pylint: disable=too-many-branches async def _direct_predict_impl( # pylint: disable=too-many-branches
*,
mapped_predict: Callable, mapped_predict: Callable,
booster: "distributed.Future", booster: "distributed.Future",
data: _DataT, data: _DataT,
@ -1249,6 +1255,7 @@ async def _predict_async(
global_config: Dict[str, Any], global_config: Dict[str, Any],
model: Union[Booster, Dict, "distributed.Future"], model: Union[Booster, Dict, "distributed.Future"],
data: _DataT, data: _DataT,
*,
output_margin: bool, output_margin: bool,
missing: float, missing: float,
pred_leaf: bool, pred_leaf: bool,
@ -1304,7 +1311,12 @@ async def _predict_async(
) )
) )
return await _direct_predict_impl( return await _direct_predict_impl(
mapped_predict, _booster, data, None, _output_shape, meta mapped_predict=mapped_predict,
booster=_booster,
data=data,
base_margin=None,
output_shape=_output_shape,
meta=meta,
) )
output_shape, _ = await client.compute( output_shape, _ = await client.compute(
@ -1392,10 +1404,12 @@ async def _predict_async(
return predictions return predictions
@_deprecate_positional_args
def predict( # pylint: disable=unused-argument def predict( # pylint: disable=unused-argument
client: Optional["distributed.Client"], client: Optional["distributed.Client"],
model: Union[TrainReturnT, Booster, "distributed.Future"], model: Union[TrainReturnT, Booster, "distributed.Future"],
data: Union[DaskDMatrix, _DataT], data: Union[DaskDMatrix, _DataT],
*,
output_margin: bool = False, output_margin: bool = False,
missing: float = numpy.nan, missing: float = numpy.nan,
pred_leaf: bool = False, pred_leaf: bool = False,
@ -1447,6 +1461,7 @@ def predict( # pylint: disable=unused-argument
async def _inplace_predict_async( # pylint: disable=too-many-branches async def _inplace_predict_async( # pylint: disable=too-many-branches
*,
client: "distributed.Client", client: "distributed.Client",
global_config: Dict[str, Any], global_config: Dict[str, Any],
model: Union[Booster, Dict, "distributed.Future"], model: Union[Booster, Dict, "distributed.Future"],
@ -1501,14 +1516,21 @@ async def _inplace_predict_async( # pylint: disable=too-many-branches
) )
) )
return await _direct_predict_impl( return await _direct_predict_impl(
mapped_predict, booster, data, base_margin, shape, meta mapped_predict=mapped_predict,
booster=booster,
data=data,
base_margin=base_margin,
output_shape=shape,
meta=meta,
) )
@_deprecate_positional_args
def inplace_predict( # pylint: disable=unused-argument def inplace_predict( # pylint: disable=unused-argument
client: Optional["distributed.Client"], client: Optional["distributed.Client"],
model: Union[TrainReturnT, Booster, "distributed.Future"], model: Union[TrainReturnT, Booster, "distributed.Future"],
data: _DataT, data: _DataT,
*,
iteration_range: IterationRange = (0, 0), iteration_range: IterationRange = (0, 0),
predict_type: str = "value", predict_type: str = "value",
missing: float = numpy.nan, missing: float = numpy.nan,
@ -1615,6 +1637,7 @@ class DaskScikitLearnBase(XGBModel):
async def _predict_async( async def _predict_async(
self, self,
data: _DataT, data: _DataT,
*,
output_margin: bool, output_margin: bool,
validate_features: bool, validate_features: bool,
base_margin: Optional[_DaskCollection], base_margin: Optional[_DaskCollection],
@ -1652,9 +1675,11 @@ class DaskScikitLearnBase(XGBModel):
) )
return predts return predts
@_deprecate_positional_args
def predict( def predict(
self, self,
X: _DataT, X: _DataT,
*,
output_margin: bool = False, output_margin: bool = False,
validate_features: bool = True, validate_features: bool = True,
base_margin: Optional[_DaskCollection] = None, base_margin: Optional[_DaskCollection] = None,
@ -1765,6 +1790,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
self, self,
X: _DataT, X: _DataT,
y: _DaskCollection, y: _DaskCollection,
*,
sample_weight: Optional[_DaskCollection], sample_weight: Optional[_DaskCollection],
base_margin: Optional[_DaskCollection], base_margin: Optional[_DaskCollection],
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]], eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
@ -1855,6 +1881,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
self, self,
X: _DataT, X: _DataT,
y: _DaskCollection, y: _DaskCollection,
*,
sample_weight: Optional[_DaskCollection], sample_weight: Optional[_DaskCollection],
base_margin: Optional[_DaskCollection], base_margin: Optional[_DaskCollection],
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]], eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
@ -1999,13 +2026,18 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
async def _predict_async( async def _predict_async(
self, self,
data: _DataT, data: _DataT,
*,
output_margin: bool, output_margin: bool,
validate_features: bool, validate_features: bool,
base_margin: Optional[_DaskCollection], base_margin: Optional[_DaskCollection],
iteration_range: Optional[IterationRange], iteration_range: Optional[IterationRange],
) -> _DaskCollection: ) -> _DaskCollection:
pred_probs = await super()._predict_async( pred_probs = await super()._predict_async(
data, output_margin, validate_features, base_margin, iteration_range data,
output_margin=output_margin,
validate_features=validate_features,
base_margin=base_margin,
iteration_range=iteration_range,
) )
if output_margin: if output_margin:
return pred_probs return pred_probs
@ -2049,6 +2081,7 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
self, self,
X: _DataT, X: _DataT,
y: _DaskCollection, y: _DaskCollection,
*,
group: Optional[_DaskCollection], group: Optional[_DaskCollection],
qid: Optional[_DaskCollection], qid: Optional[_DaskCollection],
sample_weight: Optional[_DaskCollection], sample_weight: Optional[_DaskCollection],

View File

@ -128,6 +128,7 @@ def transform_scipy_sparse(data: DataType, is_csr: bool) -> DataType:
def _from_scipy_csr( def _from_scipy_csr(
*,
data: DataType, data: DataType,
missing: FloatCompatible, missing: FloatCompatible,
nthread: int, nthread: int,
@ -176,6 +177,7 @@ def is_scipy_csc(data: DataType) -> bool:
def _from_scipy_csc( def _from_scipy_csc(
*,
data: DataType, data: DataType,
missing: FloatCompatible, missing: FloatCompatible,
nthread: int, nthread: int,
@ -251,6 +253,7 @@ def _maybe_np_slice(data: DataType, dtype: Optional[NumpyDType]) -> np.ndarray:
def _from_numpy_array( def _from_numpy_array(
*,
data: np.ndarray, data: np.ndarray,
missing: FloatCompatible, missing: FloatCompatible,
nthread: int, nthread: int,
@ -639,6 +642,7 @@ def _meta_from_pandas_df(
def _from_pandas_df( def _from_pandas_df(
*,
data: DataFrame, data: DataFrame,
enable_categorical: bool, enable_categorical: bool,
missing: FloatCompatible, missing: FloatCompatible,
@ -698,6 +702,7 @@ def _is_modin_series(data: DataType) -> bool:
def _from_pandas_series( def _from_pandas_series(
*,
data: DataType, data: DataType,
missing: FloatCompatible, missing: FloatCompatible,
nthread: int, nthread: int,
@ -712,11 +717,11 @@ def _from_pandas_series(
if enable_categorical and is_pd_cat_dtype(data.dtype): if enable_categorical and is_pd_cat_dtype(data.dtype):
data = data.cat.codes data = data.cat.codes
return _from_numpy_array( return _from_numpy_array(
data.values.reshape(data.shape[0], 1).astype("float"), data=data.values.reshape(data.shape[0], 1).astype("float"),
missing, missing=missing,
nthread, nthread=nthread,
feature_names, feature_names=feature_names,
feature_types, feature_types=feature_types,
) )
@ -768,6 +773,7 @@ def _transform_dt_df(
def _from_dt_df( def _from_dt_df(
*,
data: DataType, data: DataType,
missing: Optional[FloatCompatible], missing: Optional[FloatCompatible],
nthread: int, nthread: int,
@ -778,7 +784,11 @@ def _from_dt_df(
if enable_categorical: if enable_categorical:
raise ValueError("categorical data in datatable is not supported yet.") raise ValueError("categorical data in datatable is not supported yet.")
data, feature_names, feature_types = _transform_dt_df( data, feature_names, feature_types = _transform_dt_df(
data, feature_names, feature_types, None, None data=data,
feature_names=feature_names,
feature_types=feature_types,
meta=None,
meta_type=None,
) )
ptrs = (ctypes.c_void_p * data.ncols)() ptrs = (ctypes.c_void_p * data.ncols)()
@ -968,6 +978,7 @@ def _transform_cudf_df(
def _from_cudf_df( def _from_cudf_df(
*,
data: DataType, data: DataType,
missing: FloatCompatible, missing: FloatCompatible,
nthread: int, nthread: int,
@ -1095,6 +1106,7 @@ def _is_list(data: DataType) -> TypeGuard[list]:
def _from_list( def _from_list(
*,
data: Sequence, data: Sequence,
missing: FloatCompatible, missing: FloatCompatible,
n_threads: int, n_threads: int,
@ -1105,7 +1117,12 @@ def _from_list(
array = np.array(data) array = np.array(data)
_check_data_shape(data) _check_data_shape(data)
return _from_numpy_array( return _from_numpy_array(
array, missing, n_threads, feature_names, feature_types, data_split_mode data=array,
missing=missing,
nthread=n_threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
@ -1114,6 +1131,7 @@ def _is_tuple(data: DataType) -> TypeGuard[tuple]:
def _from_tuple( def _from_tuple(
*,
data: Sequence, data: Sequence,
missing: FloatCompatible, missing: FloatCompatible,
n_threads: int, n_threads: int,
@ -1122,7 +1140,12 @@ def _from_tuple(
data_split_mode: DataSplitMode = DataSplitMode.ROW, data_split_mode: DataSplitMode = DataSplitMode.ROW,
) -> DispatchedDataBackendReturnType: ) -> DispatchedDataBackendReturnType:
return _from_list( return _from_list(
data, missing, n_threads, feature_names, feature_types, data_split_mode data=data,
missing=missing,
n_threads=n_threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
@ -1153,6 +1176,7 @@ def _convert_unknown_data(data: DataType) -> DataType:
def dispatch_data_backend( def dispatch_data_backend(
*,
data: DataType, data: DataType,
missing: FloatCompatible, # Or Optional[Float] missing: FloatCompatible, # Or Optional[Float]
threads: int, threads: int,
@ -1166,34 +1190,59 @@ def dispatch_data_backend(
_check_data_shape(data) _check_data_shape(data)
if is_scipy_csr(data): if is_scipy_csr(data):
return _from_scipy_csr( return _from_scipy_csr(
data, missing, threads, feature_names, feature_types, data_split_mode data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
if is_scipy_csc(data): if is_scipy_csc(data):
return _from_scipy_csc( return _from_scipy_csc(
data, missing, threads, feature_names, feature_types, data_split_mode data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
if is_scipy_coo(data): if is_scipy_coo(data):
return _from_scipy_csr( return _from_scipy_csr(
data.tocsr(), data=data.tocsr(),
missing, missing=missing,
threads, nthread=threads,
feature_names, feature_names=feature_names,
feature_types, feature_types=feature_types,
data_split_mode, data_split_mode=data_split_mode,
) )
if _is_np_array_like(data): if _is_np_array_like(data):
return _from_numpy_array( return _from_numpy_array(
data, missing, threads, feature_names, feature_types, data_split_mode data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
if _is_uri(data): if _is_uri(data):
return _from_uri(data, missing, feature_names, feature_types, data_split_mode) return _from_uri(data, missing, feature_names, feature_types, data_split_mode)
if _is_list(data): if _is_list(data):
return _from_list( return _from_list(
data, missing, threads, feature_names, feature_types, data_split_mode data=data,
missing=missing,
n_threads=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
if _is_tuple(data): if _is_tuple(data):
return _from_tuple( return _from_tuple(
data, missing, threads, feature_names, feature_types, data_split_mode data=data,
missing=missing,
n_threads=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
) )
if _is_arrow(data): if _is_arrow(data):
data = _arrow_transform(data) data = _arrow_transform(data)
@ -1203,17 +1252,22 @@ def dispatch_data_backend(
data = pd.DataFrame(data) data = pd.DataFrame(data)
if _is_pandas_df(data): if _is_pandas_df(data):
return _from_pandas_df( return _from_pandas_df(
data, data=data,
enable_categorical, enable_categorical=enable_categorical,
missing, missing=missing,
threads, nthread=threads,
feature_names, feature_names=feature_names,
feature_types, feature_types=feature_types,
data_split_mode, data_split_mode=data_split_mode,
) )
if _is_cudf_df(data) or _is_cudf_ser(data): if _is_cudf_df(data) or _is_cudf_ser(data):
return _from_cudf_df( return _from_cudf_df(
data, missing, threads, feature_names, feature_types, enable_categorical data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
enable_categorical=enable_categorical,
) )
if _is_cupy_alike(data): if _is_cupy_alike(data):
return _from_cupy_array(data, missing, threads, feature_names, feature_types) return _from_cupy_array(data, missing, threads, feature_names, feature_types)
@ -1226,24 +1280,49 @@ def dispatch_data_backend(
if _is_dt_df(data): if _is_dt_df(data):
_warn_unused_missing(data, missing) _warn_unused_missing(data, missing)
return _from_dt_df( return _from_dt_df(
data, missing, threads, feature_names, feature_types, enable_categorical data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
enable_categorical=enable_categorical,
) )
if _is_modin_df(data): if _is_modin_df(data):
return _from_pandas_df( return _from_pandas_df(
data, enable_categorical, missing, threads, feature_names, feature_types data=data,
enable_categorical=enable_categorical,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
) )
if _is_modin_series(data): if _is_modin_series(data):
return _from_pandas_series( return _from_pandas_series(
data, missing, threads, enable_categorical, feature_names, feature_types data=data,
missing=missing,
nthread=threads,
enable_categorical=enable_categorical,
feature_names=feature_names,
feature_types=feature_types,
) )
if _has_array_protocol(data): if _has_array_protocol(data):
array = np.asarray(data) array = np.asarray(data)
return _from_numpy_array(array, missing, threads, feature_names, feature_types) return _from_numpy_array(
data=array,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
)
converted = _convert_unknown_data(data) converted = _convert_unknown_data(data)
if converted is not None: if converted is not None:
return _from_scipy_csr( return _from_scipy_csr(
converted, missing, threads, feature_names, feature_types data=converted,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
) )
raise TypeError("Not supported type for data." + str(type(data))) raise TypeError("Not supported type for data." + str(type(data)))
@ -1313,7 +1392,9 @@ def _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -
def _meta_from_dt( def _meta_from_dt(
data: DataType, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p data: DataType, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
) -> None: ) -> None:
data, _, _ = _transform_dt_df(data, None, None, field, dtype) data, _, _ = _transform_dt_df(
data=data, feature_names=None, feature_types=None, meta=field, meta_type=dtype
)
_meta_from_numpy(data, field, dtype, handle) _meta_from_numpy(data, field, dtype, handle)

View File

@ -4,7 +4,7 @@ import ctypes
from threading import Thread from threading import Thread
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from .core import _LIB, _check_call, make_jcargs from .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs
from .tracker import RabitTracker from .tracker import RabitTracker
@ -34,10 +34,12 @@ class FederatedTracker(RabitTracker):
""" """
@_deprecate_positional_args
def __init__( # pylint: disable=R0913, W0231 def __init__( # pylint: disable=R0913, W0231
self, self,
n_workers: int, n_workers: int,
port: int, port: int,
*,
secure: bool, secure: bool,
server_key_path: Optional[str] = None, server_key_path: Optional[str] = None,
server_cert_path: Optional[str] = None, server_cert_path: Optional[str] = None,
@ -59,9 +61,11 @@ class FederatedTracker(RabitTracker):
self.handle = handle self.handle = handle
@_deprecate_positional_args
def run_federated_server( # pylint: disable=too-many-arguments def run_federated_server( # pylint: disable=too-many-arguments
n_workers: int, n_workers: int,
port: int, port: int,
*,
server_key_path: Optional[str] = None, server_key_path: Optional[str] = None,
server_cert_path: Optional[str] = None, server_cert_path: Optional[str] = None,
client_cert_path: Optional[str] = None, client_cert_path: Optional[str] = None,

View File

@ -8,15 +8,17 @@ from typing import Any, Optional, Union
import numpy as np import numpy as np
from ._typing import PathLike from ._typing import PathLike
from .core import Booster from .core import Booster, _deprecate_positional_args
from .sklearn import XGBModel from .sklearn import XGBModel
Axes = Any # real type is matplotlib.axes.Axes Axes = Any # real type is matplotlib.axes.Axes
GraphvizSource = Any # real type is graphviz.Source GraphvizSource = Any # real type is graphviz.Source
@_deprecate_positional_args
def plot_importance( def plot_importance(
booster: Union[XGBModel, Booster, dict], booster: Union[XGBModel, Booster, dict],
*,
ax: Optional[Axes] = None, ax: Optional[Axes] = None,
height: float = 0.2, height: float = 0.2,
xlim: Optional[tuple] = None, xlim: Optional[tuple] = None,
@ -146,8 +148,10 @@ def plot_importance(
return ax return ax
@_deprecate_positional_args
def to_graphviz( def to_graphviz(
booster: Union[Booster, XGBModel], booster: Union[Booster, XGBModel],
*,
fmap: PathLike = "", fmap: PathLike = "",
num_trees: int = 0, num_trees: int = 0,
rankdir: Optional[str] = None, rankdir: Optional[str] = None,

View File

@ -582,6 +582,7 @@ Parameters
def _wrap_evaluation_matrices( def _wrap_evaluation_matrices(
*,
missing: float, missing: float,
X: Any, X: Any,
y: Any, y: Any,
@ -696,8 +697,10 @@ DEFAULT_N_ESTIMATORS = 100
) )
class XGBModel(XGBModelBase): class XGBModel(XGBModelBase):
# pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring # pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring
@_deprecate_positional_args
def __init__( def __init__(
self, self,
*,
max_depth: Optional[int] = None, max_depth: Optional[int] = None,
max_leaves: Optional[int] = None, max_leaves: Optional[int] = None,
max_bin: Optional[int] = None, max_bin: Optional[int] = None,
@ -1174,9 +1177,11 @@ class XGBModel(XGBModelBase):
iteration_range = (0, 0) iteration_range = (0, 0)
return iteration_range return iteration_range
@_deprecate_positional_args
def predict( def predict(
self, self,
X: ArrayLike, X: ArrayLike,
*,
output_margin: bool = False, output_margin: bool = False,
validate_features: bool = True, validate_features: bool = True,
base_margin: Optional[ArrayLike] = None, base_margin: Optional[ArrayLike] = None,
@ -1587,9 +1592,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
"Fit gradient boosting model", "Fit gradient boosting classifier", 1 "Fit gradient boosting model", "Fit gradient boosting classifier", 1
) )
@_deprecate_positional_args
def predict( def predict(
self, self,
X: ArrayLike, X: ArrayLike,
*,
output_margin: bool = False, output_margin: bool = False,
validate_features: bool = True, validate_features: bool = True,
base_margin: Optional[ArrayLike] = None, base_margin: Optional[ArrayLike] = None,
@ -2070,9 +2077,11 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
self._set_evaluation_result(evals_result) self._set_evaluation_result(evals_result)
return self return self
@_deprecate_positional_args
def predict( def predict(
self, self,
X: ArrayLike, X: ArrayLike,
*,
output_margin: bool = False, output_margin: bool = False,
validate_features: bool = True, validate_features: bool = True,
base_margin: Optional[ArrayLike] = None, base_margin: Optional[ArrayLike] = None,
@ -2081,9 +2090,9 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
X, _ = _get_qid(X, None) X, _ = _get_qid(X, None)
return super().predict( return super().predict(
X, X,
output_margin, output_margin=output_margin,
validate_features, validate_features=validate_features,
base_margin, base_margin=base_margin,
iteration_range=iteration_range, iteration_range=iteration_range,
) )

View File

@ -1072,11 +1072,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
with CommunicatorContext(context, **_rabit_args): with CommunicatorContext(context, **_rabit_args):
with xgboost.config_context(verbosity=verbosity): with xgboost.config_context(verbosity=verbosity):
dtrain, dvalid = create_dmatrix_from_partitions( dtrain, dvalid = create_dmatrix_from_partitions(
pandas_df_iter, iterator=pandas_df_iter,
feature_prop.features_cols_names, feature_cols=feature_prop.features_cols_names,
dev_ordinal, dev_ordinal=dev_ordinal,
use_qdm, use_qdm=use_qdm,
dmatrix_kwargs, kwargs=dmatrix_kwargs,
enable_sparse_data_optim=feature_prop.enable_sparse_data_optim, enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,
has_validation_col=feature_prop.has_validation_col, has_validation_col=feature_prop.has_validation_col,
) )

View File

@ -171,6 +171,7 @@ def make_qdm(
def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
*,
iterator: Iterator[pd.DataFrame], iterator: Iterator[pd.DataFrame],
feature_cols: Optional[Sequence[str]], feature_cols: Optional[Sequence[str]],
dev_ordinal: Optional[int], dev_ordinal: Optional[int],

View File

@ -224,6 +224,7 @@ class IteratorForTest(xgb.core.DataIter):
X: Sequence, X: Sequence,
y: Sequence, y: Sequence,
w: Optional[Sequence], w: Optional[Sequence],
*,
cache: Optional[str], cache: Optional[str],
on_host: bool = False, on_host: bool = False,
) -> None: ) -> None:
@ -379,6 +380,7 @@ def make_categorical(
n_samples: int, n_samples: int,
n_features: int, n_features: int,
n_categories: int, n_categories: int,
*,
onehot: bool, onehot: bool,
sparsity: float = 0.0, sparsity: float = 0.0,
cat_ratio: float = 1.0, cat_ratio: float = 1.0,
@ -487,7 +489,9 @@ def _cat_sampled_from() -> strategies.SearchStrategy:
sparsity = args[3] sparsity = args[3]
return TestDataset( return TestDataset(
f"{n_samples}x{n_features}-{n_cats}-{sparsity}", f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
lambda: make_categorical(n_samples, n_features, n_cats, False, sparsity), lambda: make_categorical(
n_samples, n_features, n_cats, onehot=False, sparsity=sparsity
),
"reg:squarederror", "reg:squarederror",
"rmse", "rmse",
) )

View File

@ -22,7 +22,7 @@ def run_mixed_sparsity(device: str) -> None:
X = [cp.array(batch) for batch in X] X = [cp.array(batch) for batch in X]
it = tm.IteratorForTest(X, y, None, None, on_host=False) it = tm.IteratorForTest(X, y, None, cache=None, on_host=False)
Xy_0 = xgboost.QuantileDMatrix(it) Xy_0 = xgboost.QuantileDMatrix(it)
X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True) X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)

View File

@ -52,6 +52,7 @@ def validate_data_initialization(
# pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-arguments,too-many-locals
def get_feature_weights( def get_feature_weights(
*,
X: ArrayLike, X: ArrayLike,
y: ArrayLike, y: ArrayLike,
fw: np.ndarray, fw: np.ndarray,

View File

@ -291,7 +291,9 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
# categorical # categorical
n_categories = 32 n_categories = 32
X, y = tm.make_categorical(n_samples, n_features, n_categories, False, sparsity=0.8) X, y = tm.make_categorical(
n_samples, n_features, n_categories, onehot=False, sparsity=0.8
)
if use_cupy: if use_cupy:
import cudf # pylint: disable=import-error import cudf # pylint: disable=import-error
import cupy as cp # pylint: disable=import-error import cupy as cp # pylint: disable=import-error
@ -310,7 +312,7 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
# mixed # mixed
X, y = tm.make_categorical( X, y = tm.make_categorical(
n_samples, n_features, n_categories, False, sparsity=0.8, cat_ratio=0.5 n_samples, n_features, n_categories, onehot=False, sparsity=0.8, cat_ratio=0.5
) )
n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)]) n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)])
n_num_features = n_features - n_cat_features n_num_features = n_features - n_cat_features
@ -340,12 +342,12 @@ USE_PART = 1
def check_categorical_ohe( # pylint: disable=too-many-arguments def check_categorical_ohe( # pylint: disable=too-many-arguments
rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str *, rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str
) -> None: ) -> None:
"Test for one-hot encoding with categorical data." "Test for one-hot encoding with categorical data."
onehot, label = tm.make_categorical(rows, cols, cats, True) onehot, label = tm.make_categorical(rows, cols, cats, onehot=True)
cat, _ = tm.make_categorical(rows, cols, cats, False) cat, _ = tm.make_categorical(rows, cols, cats, onehot=False)
by_etl_results: Dict[str, Dict[str, List[float]]] = {} by_etl_results: Dict[str, Dict[str, List[float]]] = {}
by_builtin_results: Dict[str, Dict[str, List[float]]] = {} by_builtin_results: Dict[str, Dict[str, List[float]]] = {}

View File

@ -6,7 +6,7 @@ import socket
from enum import IntEnum, unique from enum import IntEnum, unique
from typing import Dict, Optional, Union from typing import Dict, Optional, Union
from .core import _LIB, _check_call, make_jcargs from .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs
def get_family(addr: str) -> int: def get_family(addr: str) -> int:
@ -48,11 +48,13 @@ class RabitTracker:
HOST = 0 HOST = 0
TASK = 1 TASK = 1
@_deprecate_positional_args
def __init__( # pylint: disable=too-many-arguments def __init__( # pylint: disable=too-many-arguments
self, self,
n_workers: int, n_workers: int,
host_ip: Optional[str], host_ip: Optional[str],
port: int = 0, port: int = 0,
*,
sortby: str = "host", sortby: str = "host",
timeout: int = 0, timeout: int = 0,
) -> None: ) -> None:

View File

@ -288,6 +288,7 @@ def groups_to_rows(groups: np.ndarray, boundaries: np.ndarray) -> np.ndarray:
def mkgroupfold( def mkgroupfold(
*,
dall: DMatrix, dall: DMatrix,
nfold: int, nfold: int,
param: BoosterParam, param: BoosterParam,
@ -341,6 +342,7 @@ def mkgroupfold(
def mknfold( def mknfold(
*,
dall: DMatrix, dall: DMatrix,
nfold: int, nfold: int,
param: BoosterParam, param: BoosterParam,
@ -361,7 +363,12 @@ def mknfold(
# Do standard k-fold cross validation. Automatically determine the folds. # Do standard k-fold cross validation. Automatically determine the folds.
if len(dall.get_uint_info("group_ptr")) > 1: if len(dall.get_uint_info("group_ptr")) > 1:
return mkgroupfold( return mkgroupfold(
dall, nfold, param, evals=evals, fpreproc=fpreproc, shuffle=shuffle dall=dall,
nfold=nfold,
param=param,
evals=evals,
fpreproc=fpreproc,
shuffle=shuffle,
) )
if shuffle is True: if shuffle is True:
@ -407,10 +414,12 @@ def mknfold(
return ret return ret
@_deprecate_positional_args
def cv( def cv(
params: BoosterParam, params: BoosterParam,
dtrain: DMatrix, dtrain: DMatrix,
num_boost_round: int = 10, num_boost_round: int = 10,
*,
nfold: int = 3, nfold: int = 3,
stratified: bool = False, stratified: bool = False,
folds: XGBStratifiedKFold = None, folds: XGBStratifiedKFold = None,
@ -541,7 +550,15 @@ def cv(
results: Dict[str, List[float]] = {} results: Dict[str, List[float]] = {}
cvfolds = mknfold( cvfolds = mknfold(
dtrain, nfold, params, seed, metrics, fpreproc, stratified, folds, shuffle dall=dtrain,
nfold=nfold,
param=params,
seed=seed,
evals=metrics,
fpreproc=fpreproc,
stratified=stratified,
folds=folds,
shuffle=shuffle,
) )
metric_fn = _configure_custom_metric(feval, custom_metric) metric_fn = _configure_custom_metric(feval, custom_metric)

View File

@ -32,6 +32,7 @@ class LintersPaths:
"tests/python/test_tree_regularization.py", "tests/python/test_tree_regularization.py",
"tests/python/test_training_continuation.py", "tests/python/test_training_continuation.py",
"tests/python/test_shap.py", "tests/python/test_shap.py",
"tests/python/test_updaters.py",
"tests/python/test_model_io.py", "tests/python/test_model_io.py",
"tests/python/test_with_pandas.py", "tests/python/test_with_pandas.py",
"tests/python-gpu/", "tests/python-gpu/",

View File

@ -195,7 +195,7 @@ class TestFromColumnar:
@pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_cudf())
def test_cudf_categorical(self) -> None: def test_cudf_categorical(self) -> None:
n_features = 30 n_features = 30
_X, _y = tm.make_categorical(100, n_features, 17, False) _X, _y = tm.make_categorical(100, n_features, 17, onehot=False)
X = cudf.from_pandas(_X) X = cudf.from_pandas(_X)
y = cudf.from_pandas(_y) y = cudf.from_pandas(_y)
@ -312,7 +312,7 @@ class IterForDMatrixTest(xgb.core.DataIter):
self._data = [] self._data = []
self._labels = [] self._labels = []
for i in range(self.BATCHES): for i in range(self.BATCHES):
X, y = tm.make_categorical(self.ROWS_PER_BATCH, 4, 13, False) X, y = tm.make_categorical(self.ROWS_PER_BATCH, 4, 13, onehot=False)
self._data.append(cudf.from_pandas(X)) self._data.append(cudf.from_pandas(X))
self._labels.append(y) self._labels.append(y)
else: else:

View File

@ -405,7 +405,7 @@ class TestGPUPredict:
) )
def test_shap_categorical(self): def test_shap_categorical(self):
X, y = tm.make_categorical(100, 20, 7, False) X, y = tm.make_categorical(100, 20, 7, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True) Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train( booster = xgb.train(
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10 {"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10

View File

@ -140,7 +140,14 @@ class TestGPUUpdaters:
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
@pytest.mark.skipif(**tm.no_pandas()) @pytest.mark.skipif(**tm.no_pandas())
def test_categorical_ohe(self, rows, cols, rounds, cats): def test_categorical_ohe(self, rows, cols, rounds, cats):
check_categorical_ohe(rows, cols, rounds, cats, "cuda", "hist") check_categorical_ohe(
rows=rows,
cols=cols,
rounds=rounds,
cats=cats,
device="cuda",
tree_method="hist",
)
@given( @given(
tm.categorical_dataset_strategy, tm.categorical_dataset_strategy,
@ -222,10 +229,9 @@ class TestGPUUpdaters:
def test_categorical_32_cat(self): def test_categorical_32_cat(self):
"""32 hits the bound of integer bitset, so special test""" """32 hits the bound of integer bitset, so special test"""
rows = 1000 rows = 1000
cols = 10 check_categorical_ohe(
cats = 32 rows=rows, cols=10, rounds=4, cats=32, device="cuda", tree_method="hist"
rounds = 4 )
check_categorical_ohe(rows, cols, rounds, cats, "cuda", "hist")
@pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cupy())
def test_invalid_category(self): def test_invalid_category(self):

View File

@ -104,7 +104,7 @@ class TestBoosterIO:
self.run_model_json_io(parameters, ext) self.run_model_json_io(parameters, ext)
def test_categorical_model_io(self) -> None: def test_categorical_model_io(self) -> None:
X, y = tm.make_categorical(256, 16, 71, False) X, y = tm.make_categorical(256, 16, 71, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True) Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16) booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
predt_0 = booster.predict(Xy) predt_0 = booster.predict(Xy)

View File

@ -49,7 +49,7 @@ class TestTreesToDataFrame:
assert np.allclose(cover_from_dump, cover_from_df) assert np.allclose(cover_from_dump, cover_from_df)
def run_tree_to_df_categorical(self, tree_method: str) -> None: def run_tree_to_df_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(100, 10, 31, False) X, y = tm.make_categorical(100, 10, 31, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True) Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10) booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe() df = booster.trees_to_dataframe()
@ -61,7 +61,7 @@ class TestTreesToDataFrame:
self.run_tree_to_df_categorical("approx") self.run_tree_to_df_categorical("approx")
def run_split_value_histograms(self, tree_method) -> None: def run_split_value_histograms(self, tree_method) -> None:
X, y = tm.make_categorical(1000, 10, 13, False) X, y = tm.make_categorical(1000, 10, 13, onehot=False)
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True) reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
reg.fit(X, y) reg.fit(X, y)

View File

@ -97,14 +97,15 @@ class TestQuantileDMatrix:
if sparsity == 0.0: if sparsity == 0.0:
it = IteratorForTest( it = IteratorForTest(
*make_batches(n_samples_per_batch, n_features, n_batches, False), None *make_batches(n_samples_per_batch, n_features, n_batches, False),
cache=None,
) )
else: else:
it = IteratorForTest( it = IteratorForTest(
*make_batches_sparse( *make_batches_sparse(
n_samples_per_batch, n_features, n_batches, sparsity n_samples_per_batch, n_features, n_batches, sparsity
), ),
None, cache=None,
) )
Xy = xgb.QuantileDMatrix(it) Xy = xgb.QuantileDMatrix(it)
assert Xy.num_row() == n_samples_per_batch * n_batches assert Xy.num_row() == n_samples_per_batch * n_batches
@ -134,14 +135,15 @@ class TestQuantileDMatrix:
n_batches = 7 n_batches = 7
if sparsity == 0.0: if sparsity == 0.0:
it = IteratorForTest( it = IteratorForTest(
*make_batches(n_samples_per_batch, n_features, n_batches, False), None *make_batches(n_samples_per_batch, n_features, n_batches, False),
cache=None,
) )
else: else:
it = IteratorForTest( it = IteratorForTest(
*make_batches_sparse( *make_batches_sparse(
n_samples_per_batch, n_features, n_batches, sparsity n_samples_per_batch, n_features, n_batches, sparsity
), ),
None, cache=None,
) )
parameters = {"tree_method": "hist", "max_bin": 256} parameters = {"tree_method": "hist", "max_bin": 256}

View File

@ -81,23 +81,26 @@ class TestTreeMethod:
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_pruner(self): def test_pruner(self):
import sklearn import sklearn
params = {'tree_method': 'exact'}
params = {"tree_method": "exact"}
cancer = sklearn.datasets.load_breast_cancer() cancer = sklearn.datasets.load_breast_cancer()
X = cancer['data'] X = cancer["data"]
y = cancer["target"] y = cancer["target"]
dtrain = xgb.DMatrix(X, y) dtrain = xgb.DMatrix(X, y)
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10) booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)
grown = str(booster.get_dump()) grown = str(booster.get_dump())
params = {'updater': 'prune', 'process_type': 'update', 'gamma': '0.2'} params = {"updater": "prune", "process_type": "update", "gamma": "0.2"}
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10, booster = xgb.train(
xgb_model=booster) params, dtrain=dtrain, num_boost_round=10, xgb_model=booster
)
after_prune = str(booster.get_dump()) after_prune = str(booster.get_dump())
assert grown != after_prune assert grown != after_prune
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10, booster = xgb.train(
xgb_model=booster) params, dtrain=dtrain, num_boost_round=10, xgb_model=booster
)
second_prune = str(booster.get_dump()) second_prune = str(booster.get_dump())
# Second prune should not change the tree # Second prune should not change the tree
assert after_prune == second_prune assert after_prune == second_prune
@ -107,11 +110,12 @@ class TestTreeMethod:
hist_parameter_strategy, hist_parameter_strategy,
hist_cache_strategy, hist_cache_strategy,
strategies.integers(1, 20), strategies.integers(1, 20),
tm.make_dataset_strategy() tm.make_dataset_strategy(),
) )
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)
def test_hist( def test_hist(
self, param: Dict[str, Any], self,
param: Dict[str, Any],
hist_param: Dict[str, Any], hist_param: Dict[str, Any],
cache_param: Dict[str, Any], cache_param: Dict[str, Any],
num_rounds: int, num_rounds: int,
@ -128,11 +132,13 @@ class TestTreeMethod:
def test_hist_categorical(self): def test_hist_categorical(self):
# hist must be same as exact on all-categorial data # hist must be same as exact on all-categorial data
ag_dtrain, ag_dtest = tm.load_agaricus(__file__) ag_dtrain, ag_dtest = tm.load_agaricus(__file__)
ag_param = {'max_depth': 2, ag_param = {
'tree_method': 'hist', "max_depth": 2,
'eta': 1, "tree_method": "hist",
'objective': 'binary:logistic', "eta": 1,
'eval_metric': 'auc'} "objective": "binary:logistic",
"eval_metric": "auc",
}
hist_res = {} hist_res = {}
exact_res = {} exact_res = {}
@ -141,7 +147,7 @@ class TestTreeMethod:
ag_dtrain, ag_dtrain,
10, 10,
evals=[(ag_dtrain, "train"), (ag_dtest, "test")], evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
evals_result=hist_res evals_result=hist_res,
) )
ag_param["tree_method"] = "exact" ag_param["tree_method"] = "exact"
xgb.train( xgb.train(
@ -149,10 +155,10 @@ class TestTreeMethod:
ag_dtrain, ag_dtrain,
10, 10,
evals=[(ag_dtrain, "train"), (ag_dtest, "test")], evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
evals_result=exact_res evals_result=exact_res,
) )
assert hist_res['train']['auc'] == exact_res['train']['auc'] assert hist_res["train"]["auc"] == exact_res["train"]["auc"]
assert hist_res['test']['auc'] == exact_res['test']['auc'] assert hist_res["test"]["auc"] == exact_res["test"]["auc"]
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_hist_degenerate_case(self): def test_hist_degenerate_case(self):
@ -160,11 +166,17 @@ class TestTreeMethod:
# quantile points for a particular feature (the second feature in # quantile points for a particular feature (the second feature in
# this example). Source: https://github.com/dmlc/xgboost/issues/2943 # this example). Source: https://github.com/dmlc/xgboost/issues/2943
nan = np.nan nan = np.nan
param = {'missing': nan, 'tree_method': 'hist'} param = {"missing": nan, "tree_method": "hist"}
model = xgb.XGBRegressor(**param) model = xgb.XGBRegressor(**param)
X = np.array([[6.18827160e+05, 1.73000000e+02], [6.37345679e+05, nan], X = np.array(
[6.38888889e+05, nan], [6.28086420e+05, nan]]) [
y = [1000000., 0., 0., 500000.] [6.18827160e05, 1.73000000e02],
[6.37345679e05, nan],
[6.38888889e05, nan],
[6.28086420e05, nan],
]
)
y = [1000000.0, 0.0, 0.0, 500000.0]
w = [0, 0, 1, 0] w = [0, 0, 1, 0]
model.fit(X, y, sample_weight=w) model.fit(X, y, sample_weight=w)
@ -174,12 +186,12 @@ class TestTreeMethod:
param = {"tree_method": "hist", "max_bin": 64} param = {"tree_method": "hist", "max_bin": 64}
hist_result = train_result(param, dataset.get_dmat(), 16) hist_result = train_result(param, dataset.get_dmat(), 16)
note(str(hist_result)) note(str(hist_result))
assert tm.non_increasing(hist_result['train'][dataset.metric]) assert tm.non_increasing(hist_result["train"][dataset.metric])
param = {"tree_method": "approx", "max_bin": 64} param = {"tree_method": "approx", "max_bin": 64}
approx_result = train_result(param, dataset.get_dmat(), 16) approx_result = train_result(param, dataset.get_dmat(), 16)
note(str(approx_result)) note(str(approx_result))
assert tm.non_increasing(approx_result['train'][dataset.metric]) assert tm.non_increasing(approx_result["train"][dataset.metric])
np.testing.assert_allclose( np.testing.assert_allclose(
hist_result["train"]["rmse"], approx_result["train"]["rmse"] hist_result["train"]["rmse"], approx_result["train"]["rmse"]
@ -248,15 +260,33 @@ class TestTreeMethod:
def test_max_cat(self, tree_method) -> None: def test_max_cat(self, tree_method) -> None:
self.run_max_cat(tree_method) self.run_max_cat(tree_method)
@given(strategies.integers(10, 400), strategies.integers(3, 8), @given(
strategies.integers(1, 2), strategies.integers(4, 7)) strategies.integers(10, 400),
strategies.integers(3, 8),
strategies.integers(1, 2),
strategies.integers(4, 7),
)
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)
@pytest.mark.skipif(**tm.no_pandas()) @pytest.mark.skipif(**tm.no_pandas())
def test_categorical_ohe( def test_categorical_ohe(
self, rows: int, cols: int, rounds: int, cats: int self, rows: int, cols: int, rounds: int, cats: int
) -> None: ) -> None:
check_categorical_ohe(rows, cols, rounds, cats, "cpu", "approx") check_categorical_ohe(
check_categorical_ohe(rows, cols, rounds, cats, "cpu", "hist") rows=rows,
cols=cols,
rounds=rounds,
cats=cats,
device="cpu",
tree_method="approx",
)
check_categorical_ohe(
rows=rows,
cols=cols,
rounds=rounds,
cats=cats,
device="cpu",
tree_method="hist",
)
@given( @given(
tm.categorical_dataset_strategy, tm.categorical_dataset_strategy,
@ -307,7 +337,7 @@ class TestTreeMethod:
@given( @given(
strategies.integers(10, 400), strategies.integers(10, 400),
strategies.integers(3, 8), strategies.integers(3, 8),
strategies.integers(4, 7) strategies.integers(4, 7),
) )
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)
@pytest.mark.skipif(**tm.no_pandas()) @pytest.mark.skipif(**tm.no_pandas())
@ -395,9 +425,8 @@ class TestTreeMethod:
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.parametrize( @pytest.mark.parametrize(
"tree_method,weighted", [ "tree_method,weighted",
("approx", False), ("hist", False), ("approx", True), ("hist", True) [("approx", False), ("hist", False), ("approx", True), ("hist", True)],
]
) )
def test_adaptive(self, tree_method, weighted) -> None: def test_adaptive(self, tree_method, weighted) -> None:
self.run_adaptive(tree_method, weighted) self.run_adaptive(tree_method, weighted)

View File

@ -1161,14 +1161,24 @@ def test_feature_weights(tree_method):
parser_path = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py") parser_path = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
poly_increasing = get_feature_weights( poly_increasing = get_feature_weights(
X, y, fw, parser_path, tree_method, xgb.XGBRegressor X=X,
y=y,
fw=fw,
parser_path=parser_path,
tree_method=tree_method,
model=xgb.XGBRegressor,
) )
fw = np.ones(shape=(kCols,)) fw = np.ones(shape=(kCols,))
for i in range(kCols): for i in range(kCols):
fw[i] *= float(kCols - i) fw[i] *= float(kCols - i)
poly_decreasing = get_feature_weights( poly_decreasing = get_feature_weights(
X, y, fw, parser_path, tree_method, xgb.XGBRegressor X=X,
y=y,
fw=fw,
parser_path=parser_path,
tree_method=tree_method,
model=xgb.XGBRegressor,
) )
# Approxmated test, this is dependent on the implementation of random # Approxmated test, this is dependent on the implementation of random

View File

@ -359,7 +359,7 @@ def run_categorical(
def test_categorical(client: "Client") -> None: def test_categorical(client: "Client") -> None:
X, y = make_categorical(client, 10000, 30, 13) X, y = make_categorical(client, 10000, 30, 13)
X_onehot, _ = make_categorical(client, 10000, 30, 13, True) X_onehot, _ = make_categorical(client, 10000, 30, 13, onehot=True)
run_categorical(client, "approx", "cpu", X, X_onehot, y) run_categorical(client, "approx", "cpu", X, X_onehot, y)
run_categorical(client, "hist", "cpu", X, X_onehot, y) run_categorical(client, "hist", "cpu", X, X_onehot, y)
@ -1335,7 +1335,7 @@ class TestWithDask:
def save_dmatrix(rabit_args: Dict[str, Union[int, str]], tmpdir: str) -> None: def save_dmatrix(rabit_args: Dict[str, Union[int, str]], tmpdir: str) -> None:
with xgb.dask.CommunicatorContext(**rabit_args): with xgb.dask.CommunicatorContext(**rabit_args):
rank = xgb.collective.get_rank() rank = xgb.collective.get_rank()
X, y = tm.make_categorical(100, 4, 4, False) X, y = tm.make_categorical(100, 4, 4, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True) Xy = xgb.DMatrix(X, y, enable_categorical=True)
path = os.path.join(tmpdir, f"{rank}.bin") path = os.path.join(tmpdir, f"{rank}.bin")
Xy.save_binary(path) Xy.save_binary(path)
@ -1665,7 +1665,12 @@ class TestWithDask:
fw = da.from_array(fw) fw = da.from_array(fw)
parser = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py") parser = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
poly_increasing = get_feature_weights( poly_increasing = get_feature_weights(
X, y, fw, parser, "approx", model=xgb.dask.DaskXGBRegressor X=X,
y=y,
fw=fw,
parser_path=parser,
tree_method="approx",
model=xgb.dask.DaskXGBRegressor,
) )
fw = np.ones(shape=(kCols,)) fw = np.ones(shape=(kCols,))
@ -1673,7 +1678,12 @@ class TestWithDask:
fw[i] *= float(kCols - i) fw[i] *= float(kCols - i)
fw = da.from_array(fw) fw = da.from_array(fw)
poly_decreasing = get_feature_weights( poly_decreasing = get_feature_weights(
X, y, fw, parser, "approx", model=xgb.dask.DaskXGBRegressor X=X,
y=y,
fw=fw,
parser_path=parser,
tree_method="approx",
model=xgb.dask.DaskXGBRegressor,
) )
# Approxmated test, this is dependent on the implementation of random # Approxmated test, this is dependent on the implementation of random

View File

@ -67,8 +67,8 @@ def run_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool, on_gpu: bool) -> None:
cols = [f"feat-{i}" for i in range(n_features)] cols = [f"feat-{i}" for i in range(n_features)]
feature_cols = cols if is_feature_cols else None feature_cols = cols if is_feature_cols else None
train_Xy, valid_Xy = create_dmatrix_from_partitions( train_Xy, valid_Xy = create_dmatrix_from_partitions(
iter(dfs), iterator=iter(dfs),
feature_cols, feature_cols=feature_cols,
dev_ordinal=device_id, dev_ordinal=device_id,
use_qdm=is_qdm, use_qdm=is_qdm,
kwargs=kwargs, kwargs=kwargs,