[CI] Fix PyLint errors. (#10837)

This commit is contained in:
Jiaming Yuan
2024-09-24 14:09:32 +08:00
committed by GitHub
parent 982ee34658
commit 68a8865bc5
26 changed files with 349 additions and 118 deletions

View File

@@ -23,7 +23,13 @@ from typing import (
import numpy
from . import collective
from .core import Booster, DMatrix, XGBoostError, _parse_eval_str
from .core import (
Booster,
DMatrix,
XGBoostError,
_deprecate_positional_args,
_parse_eval_str,
)
__all__ = [
"TrainingCallback",
@@ -346,8 +352,10 @@ class EarlyStopping(TrainingCallback):
"""
# pylint: disable=too-many-arguments
@_deprecate_positional_args
def __init__(
self,
*,
rounds: int,
metric_name: Optional[str] = None,
data_name: Optional[str] = None,
@@ -375,7 +383,7 @@ class EarlyStopping(TrainingCallback):
return model
def _update_rounds(
self, score: _Score, name: str, metric: str, model: _Model, epoch: int
self, *, score: _Score, name: str, metric: str, model: _Model, epoch: int
) -> bool:
def get_s(value: _Score) -> float:
"""get score if it's cross validation history."""
@@ -471,7 +479,9 @@ class EarlyStopping(TrainingCallback):
# The latest score
score = data_log[metric_name][-1]
return self._update_rounds(score, data_name, metric_name, model, epoch)
return self._update_rounds(
score=score, name=data_name, metric=metric_name, model=model, epoch=epoch
)
def after_training(self, model: _Model) -> _Model:
if not self.save_best:

View File

@@ -907,7 +907,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
return
handle, feature_names, feature_types = dispatch_data_backend(
data,
data=data,
missing=self.missing,
threads=self.nthread,
feature_names=feature_names,
@@ -1697,6 +1697,7 @@ class ExtMemQuantileDMatrix(DMatrix):
def __init__( # pylint: disable=super-init-not-called
self,
data: DataIter,
*,
missing: Optional[float] = None,
nthread: Optional[int] = None,
max_bin: Optional[int] = None,
@@ -2355,9 +2356,11 @@ class Booster:
return self.eval_set([(data, name)], iteration)
# pylint: disable=too-many-function-args
@_deprecate_positional_args
def predict(
self,
data: DMatrix,
*,
output_margin: bool = False,
pred_leaf: bool = False,
pred_contribs: bool = False,
@@ -2490,9 +2493,11 @@ class Booster:
return _prediction_output(shape, dims, preds, False)
# pylint: disable=too-many-statements
@_deprecate_positional_args
def inplace_predict(
self,
data: DataType,
*,
iteration_range: IterationRange = (0, 0),
predict_type: str = "value",
missing: float = np.nan,

View File

@@ -339,8 +339,8 @@ class DaskDMatrix:
self._init = client.sync(
self._map_local_data,
client,
data,
client=client,
data=data,
label=label,
weights=weight,
base_margin=base_margin,
@@ -355,6 +355,7 @@ class DaskDMatrix:
async def _map_local_data(
self,
*,
client: "distributed.Client",
data: _DataT,
label: Optional[_DaskCollection] = None,
@@ -589,6 +590,7 @@ class DaskPartitionIter(DataIter): # pylint: disable=R0902
self,
data: List[Any],
label: Optional[List[Any]] = None,
*,
weight: Optional[List[Any]] = None,
base_margin: Optional[List[Any]] = None,
qid: Optional[List[Any]] = None,
@@ -712,6 +714,7 @@ class DaskQuantileDMatrix(DaskDMatrix):
def _create_quantile_dmatrix(
*,
feature_names: Optional[FeatureNames],
feature_types: Optional[Union[Any, List[Any]]],
feature_weights: Optional[Any],
@@ -757,6 +760,7 @@ def _create_quantile_dmatrix(
def _create_dmatrix(
*,
feature_names: Optional[FeatureNames],
feature_types: Optional[Union[Any, List[Any]]],
feature_weights: Optional[Any],
@@ -927,6 +931,7 @@ def _get_dmatrices(
async def _train_async(
*,
client: "distributed.Client",
global_config: Dict[str, Any],
dconfig: Optional[Dict[str, Any]],
@@ -947,7 +952,7 @@ async def _train_async(
_rabit_args = await _get_rabit_args(len(workers), dconfig, client)
_check_distributed_params(params)
def dispatched_train(
def dispatched_train( # pylint: disable=too-many-positional-arguments
parameters: Dict,
rabit_args: Dict[str, Union[str, int]],
train_id: int,
@@ -1115,6 +1120,7 @@ def _maybe_dataframe(
async def _direct_predict_impl( # pylint: disable=too-many-branches
*,
mapped_predict: Callable,
booster: "distributed.Future",
data: _DataT,
@@ -1249,6 +1255,7 @@ async def _predict_async(
global_config: Dict[str, Any],
model: Union[Booster, Dict, "distributed.Future"],
data: _DataT,
*,
output_margin: bool,
missing: float,
pred_leaf: bool,
@@ -1304,7 +1311,12 @@ async def _predict_async(
)
)
return await _direct_predict_impl(
mapped_predict, _booster, data, None, _output_shape, meta
mapped_predict=mapped_predict,
booster=_booster,
data=data,
base_margin=None,
output_shape=_output_shape,
meta=meta,
)
output_shape, _ = await client.compute(
@@ -1392,10 +1404,12 @@ async def _predict_async(
return predictions
@_deprecate_positional_args
def predict( # pylint: disable=unused-argument
client: Optional["distributed.Client"],
model: Union[TrainReturnT, Booster, "distributed.Future"],
data: Union[DaskDMatrix, _DataT],
*,
output_margin: bool = False,
missing: float = numpy.nan,
pred_leaf: bool = False,
@@ -1447,6 +1461,7 @@ def predict( # pylint: disable=unused-argument
async def _inplace_predict_async( # pylint: disable=too-many-branches
*,
client: "distributed.Client",
global_config: Dict[str, Any],
model: Union[Booster, Dict, "distributed.Future"],
@@ -1501,14 +1516,21 @@ async def _inplace_predict_async( # pylint: disable=too-many-branches
)
)
return await _direct_predict_impl(
mapped_predict, booster, data, base_margin, shape, meta
mapped_predict=mapped_predict,
booster=booster,
data=data,
base_margin=base_margin,
output_shape=shape,
meta=meta,
)
@_deprecate_positional_args
def inplace_predict( # pylint: disable=unused-argument
client: Optional["distributed.Client"],
model: Union[TrainReturnT, Booster, "distributed.Future"],
data: _DataT,
*,
iteration_range: IterationRange = (0, 0),
predict_type: str = "value",
missing: float = numpy.nan,
@@ -1615,6 +1637,7 @@ class DaskScikitLearnBase(XGBModel):
async def _predict_async(
self,
data: _DataT,
*,
output_margin: bool,
validate_features: bool,
base_margin: Optional[_DaskCollection],
@@ -1652,9 +1675,11 @@ class DaskScikitLearnBase(XGBModel):
)
return predts
@_deprecate_positional_args
def predict(
self,
X: _DataT,
*,
output_margin: bool = False,
validate_features: bool = True,
base_margin: Optional[_DaskCollection] = None,
@@ -1765,6 +1790,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
self,
X: _DataT,
y: _DaskCollection,
*,
sample_weight: Optional[_DaskCollection],
base_margin: Optional[_DaskCollection],
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
@@ -1855,6 +1881,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
self,
X: _DataT,
y: _DaskCollection,
*,
sample_weight: Optional[_DaskCollection],
base_margin: Optional[_DaskCollection],
eval_set: Optional[Sequence[Tuple[_DaskCollection, _DaskCollection]]],
@@ -1999,13 +2026,18 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
async def _predict_async(
self,
data: _DataT,
*,
output_margin: bool,
validate_features: bool,
base_margin: Optional[_DaskCollection],
iteration_range: Optional[IterationRange],
) -> _DaskCollection:
pred_probs = await super()._predict_async(
data, output_margin, validate_features, base_margin, iteration_range
data,
output_margin=output_margin,
validate_features=validate_features,
base_margin=base_margin,
iteration_range=iteration_range,
)
if output_margin:
return pred_probs
@@ -2049,6 +2081,7 @@ class DaskXGBRanker(DaskScikitLearnBase, XGBRankerMixIn):
self,
X: _DataT,
y: _DaskCollection,
*,
group: Optional[_DaskCollection],
qid: Optional[_DaskCollection],
sample_weight: Optional[_DaskCollection],

View File

@@ -128,6 +128,7 @@ def transform_scipy_sparse(data: DataType, is_csr: bool) -> DataType:
def _from_scipy_csr(
*,
data: DataType,
missing: FloatCompatible,
nthread: int,
@@ -176,6 +177,7 @@ def is_scipy_csc(data: DataType) -> bool:
def _from_scipy_csc(
*,
data: DataType,
missing: FloatCompatible,
nthread: int,
@@ -251,6 +253,7 @@ def _maybe_np_slice(data: DataType, dtype: Optional[NumpyDType]) -> np.ndarray:
def _from_numpy_array(
*,
data: np.ndarray,
missing: FloatCompatible,
nthread: int,
@@ -639,6 +642,7 @@ def _meta_from_pandas_df(
def _from_pandas_df(
*,
data: DataFrame,
enable_categorical: bool,
missing: FloatCompatible,
@@ -698,6 +702,7 @@ def _is_modin_series(data: DataType) -> bool:
def _from_pandas_series(
*,
data: DataType,
missing: FloatCompatible,
nthread: int,
@@ -712,11 +717,11 @@ def _from_pandas_series(
if enable_categorical and is_pd_cat_dtype(data.dtype):
data = data.cat.codes
return _from_numpy_array(
data.values.reshape(data.shape[0], 1).astype("float"),
missing,
nthread,
feature_names,
feature_types,
data=data.values.reshape(data.shape[0], 1).astype("float"),
missing=missing,
nthread=nthread,
feature_names=feature_names,
feature_types=feature_types,
)
@@ -768,6 +773,7 @@ def _transform_dt_df(
def _from_dt_df(
*,
data: DataType,
missing: Optional[FloatCompatible],
nthread: int,
@@ -778,7 +784,11 @@ def _from_dt_df(
if enable_categorical:
raise ValueError("categorical data in datatable is not supported yet.")
data, feature_names, feature_types = _transform_dt_df(
data, feature_names, feature_types, None, None
data=data,
feature_names=feature_names,
feature_types=feature_types,
meta=None,
meta_type=None,
)
ptrs = (ctypes.c_void_p * data.ncols)()
@@ -968,6 +978,7 @@ def _transform_cudf_df(
def _from_cudf_df(
*,
data: DataType,
missing: FloatCompatible,
nthread: int,
@@ -1095,6 +1106,7 @@ def _is_list(data: DataType) -> TypeGuard[list]:
def _from_list(
*,
data: Sequence,
missing: FloatCompatible,
n_threads: int,
@@ -1105,7 +1117,12 @@ def _from_list(
array = np.array(data)
_check_data_shape(data)
return _from_numpy_array(
array, missing, n_threads, feature_names, feature_types, data_split_mode
data=array,
missing=missing,
nthread=n_threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
@@ -1114,6 +1131,7 @@ def _is_tuple(data: DataType) -> TypeGuard[tuple]:
def _from_tuple(
*,
data: Sequence,
missing: FloatCompatible,
n_threads: int,
@@ -1122,7 +1140,12 @@ def _from_tuple(
data_split_mode: DataSplitMode = DataSplitMode.ROW,
) -> DispatchedDataBackendReturnType:
return _from_list(
data, missing, n_threads, feature_names, feature_types, data_split_mode
data=data,
missing=missing,
n_threads=n_threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
@@ -1153,6 +1176,7 @@ def _convert_unknown_data(data: DataType) -> DataType:
def dispatch_data_backend(
*,
data: DataType,
missing: FloatCompatible, # Or Optional[Float]
threads: int,
@@ -1166,34 +1190,59 @@ def dispatch_data_backend(
_check_data_shape(data)
if is_scipy_csr(data):
return _from_scipy_csr(
data, missing, threads, feature_names, feature_types, data_split_mode
data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if is_scipy_csc(data):
return _from_scipy_csc(
data, missing, threads, feature_names, feature_types, data_split_mode
data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if is_scipy_coo(data):
return _from_scipy_csr(
data.tocsr(),
missing,
threads,
feature_names,
feature_types,
data_split_mode,
data=data.tocsr(),
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if _is_np_array_like(data):
return _from_numpy_array(
data, missing, threads, feature_names, feature_types, data_split_mode
data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if _is_uri(data):
return _from_uri(data, missing, feature_names, feature_types, data_split_mode)
if _is_list(data):
return _from_list(
data, missing, threads, feature_names, feature_types, data_split_mode
data=data,
missing=missing,
n_threads=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if _is_tuple(data):
return _from_tuple(
data, missing, threads, feature_names, feature_types, data_split_mode
data=data,
missing=missing,
n_threads=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if _is_arrow(data):
data = _arrow_transform(data)
@@ -1203,17 +1252,22 @@ def dispatch_data_backend(
data = pd.DataFrame(data)
if _is_pandas_df(data):
return _from_pandas_df(
data,
enable_categorical,
missing,
threads,
feature_names,
feature_types,
data_split_mode,
data=data,
enable_categorical=enable_categorical,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
data_split_mode=data_split_mode,
)
if _is_cudf_df(data) or _is_cudf_ser(data):
return _from_cudf_df(
data, missing, threads, feature_names, feature_types, enable_categorical
data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
enable_categorical=enable_categorical,
)
if _is_cupy_alike(data):
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
@@ -1226,24 +1280,49 @@ def dispatch_data_backend(
if _is_dt_df(data):
_warn_unused_missing(data, missing)
return _from_dt_df(
data, missing, threads, feature_names, feature_types, enable_categorical
data=data,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
enable_categorical=enable_categorical,
)
if _is_modin_df(data):
return _from_pandas_df(
data, enable_categorical, missing, threads, feature_names, feature_types
data=data,
enable_categorical=enable_categorical,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
)
if _is_modin_series(data):
return _from_pandas_series(
data, missing, threads, enable_categorical, feature_names, feature_types
data=data,
missing=missing,
nthread=threads,
enable_categorical=enable_categorical,
feature_names=feature_names,
feature_types=feature_types,
)
if _has_array_protocol(data):
array = np.asarray(data)
return _from_numpy_array(array, missing, threads, feature_names, feature_types)
return _from_numpy_array(
data=array,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
)
converted = _convert_unknown_data(data)
if converted is not None:
return _from_scipy_csr(
converted, missing, threads, feature_names, feature_types
data=converted,
missing=missing,
nthread=threads,
feature_names=feature_names,
feature_types=feature_types,
)
raise TypeError("Not supported type for data." + str(type(data)))
@@ -1313,7 +1392,9 @@ def _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -
def _meta_from_dt(
data: DataType, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
) -> None:
data, _, _ = _transform_dt_df(data, None, None, field, dtype)
data, _, _ = _transform_dt_df(
data=data, feature_names=None, feature_types=None, meta=field, meta_type=dtype
)
_meta_from_numpy(data, field, dtype, handle)

View File

@@ -4,7 +4,7 @@ import ctypes
from threading import Thread
from typing import Any, Dict, Optional
from .core import _LIB, _check_call, make_jcargs
from .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs
from .tracker import RabitTracker
@@ -34,10 +34,12 @@ class FederatedTracker(RabitTracker):
"""
@_deprecate_positional_args
def __init__( # pylint: disable=R0913, W0231
self,
n_workers: int,
port: int,
*,
secure: bool,
server_key_path: Optional[str] = None,
server_cert_path: Optional[str] = None,
@@ -59,9 +61,11 @@ class FederatedTracker(RabitTracker):
self.handle = handle
@_deprecate_positional_args
def run_federated_server( # pylint: disable=too-many-arguments
n_workers: int,
port: int,
*,
server_key_path: Optional[str] = None,
server_cert_path: Optional[str] = None,
client_cert_path: Optional[str] = None,

View File

@@ -8,15 +8,17 @@ from typing import Any, Optional, Union
import numpy as np
from ._typing import PathLike
from .core import Booster
from .core import Booster, _deprecate_positional_args
from .sklearn import XGBModel
Axes = Any # real type is matplotlib.axes.Axes
GraphvizSource = Any # real type is graphviz.Source
@_deprecate_positional_args
def plot_importance(
booster: Union[XGBModel, Booster, dict],
*,
ax: Optional[Axes] = None,
height: float = 0.2,
xlim: Optional[tuple] = None,
@@ -146,8 +148,10 @@ def plot_importance(
return ax
@_deprecate_positional_args
def to_graphviz(
booster: Union[Booster, XGBModel],
*,
fmap: PathLike = "",
num_trees: int = 0,
rankdir: Optional[str] = None,

View File

@@ -582,6 +582,7 @@ Parameters
def _wrap_evaluation_matrices(
*,
missing: float,
X: Any,
y: Any,
@@ -696,8 +697,10 @@ DEFAULT_N_ESTIMATORS = 100
)
class XGBModel(XGBModelBase):
# pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring
@_deprecate_positional_args
def __init__(
self,
*,
max_depth: Optional[int] = None,
max_leaves: Optional[int] = None,
max_bin: Optional[int] = None,
@@ -1174,9 +1177,11 @@ class XGBModel(XGBModelBase):
iteration_range = (0, 0)
return iteration_range
@_deprecate_positional_args
def predict(
self,
X: ArrayLike,
*,
output_margin: bool = False,
validate_features: bool = True,
base_margin: Optional[ArrayLike] = None,
@@ -1587,9 +1592,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
"Fit gradient boosting model", "Fit gradient boosting classifier", 1
)
@_deprecate_positional_args
def predict(
self,
X: ArrayLike,
*,
output_margin: bool = False,
validate_features: bool = True,
base_margin: Optional[ArrayLike] = None,
@@ -2070,9 +2077,11 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
self._set_evaluation_result(evals_result)
return self
@_deprecate_positional_args
def predict(
self,
X: ArrayLike,
*,
output_margin: bool = False,
validate_features: bool = True,
base_margin: Optional[ArrayLike] = None,
@@ -2081,9 +2090,9 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
X, _ = _get_qid(X, None)
return super().predict(
X,
output_margin,
validate_features,
base_margin,
output_margin=output_margin,
validate_features=validate_features,
base_margin=base_margin,
iteration_range=iteration_range,
)

View File

@@ -1072,11 +1072,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
with CommunicatorContext(context, **_rabit_args):
with xgboost.config_context(verbosity=verbosity):
dtrain, dvalid = create_dmatrix_from_partitions(
pandas_df_iter,
feature_prop.features_cols_names,
dev_ordinal,
use_qdm,
dmatrix_kwargs,
iterator=pandas_df_iter,
feature_cols=feature_prop.features_cols_names,
dev_ordinal=dev_ordinal,
use_qdm=use_qdm,
kwargs=dmatrix_kwargs,
enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,
has_validation_col=feature_prop.has_validation_col,
)

View File

@@ -171,6 +171,7 @@ def make_qdm(
def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
*,
iterator: Iterator[pd.DataFrame],
feature_cols: Optional[Sequence[str]],
dev_ordinal: Optional[int],

View File

@@ -224,6 +224,7 @@ class IteratorForTest(xgb.core.DataIter):
X: Sequence,
y: Sequence,
w: Optional[Sequence],
*,
cache: Optional[str],
on_host: bool = False,
) -> None:
@@ -379,6 +380,7 @@ def make_categorical(
n_samples: int,
n_features: int,
n_categories: int,
*,
onehot: bool,
sparsity: float = 0.0,
cat_ratio: float = 1.0,
@@ -487,7 +489,9 @@ def _cat_sampled_from() -> strategies.SearchStrategy:
sparsity = args[3]
return TestDataset(
f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
lambda: make_categorical(n_samples, n_features, n_cats, False, sparsity),
lambda: make_categorical(
n_samples, n_features, n_cats, onehot=False, sparsity=sparsity
),
"reg:squarederror",
"rmse",
)

View File

@@ -22,7 +22,7 @@ def run_mixed_sparsity(device: str) -> None:
X = [cp.array(batch) for batch in X]
it = tm.IteratorForTest(X, y, None, None, on_host=False)
it = tm.IteratorForTest(X, y, None, cache=None, on_host=False)
Xy_0 = xgboost.QuantileDMatrix(it)
X_1, y_1 = tm.make_sparse_regression(256, 16, 0.1, True)

View File

@@ -52,6 +52,7 @@ def validate_data_initialization(
# pylint: disable=too-many-arguments,too-many-locals
def get_feature_weights(
*,
X: ArrayLike,
y: ArrayLike,
fw: np.ndarray,

View File

@@ -291,7 +291,9 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
# categorical
n_categories = 32
X, y = tm.make_categorical(n_samples, n_features, n_categories, False, sparsity=0.8)
X, y = tm.make_categorical(
n_samples, n_features, n_categories, onehot=False, sparsity=0.8
)
if use_cupy:
import cudf # pylint: disable=import-error
import cupy as cp # pylint: disable=import-error
@@ -310,7 +312,7 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
# mixed
X, y = tm.make_categorical(
n_samples, n_features, n_categories, False, sparsity=0.8, cat_ratio=0.5
n_samples, n_features, n_categories, onehot=False, sparsity=0.8, cat_ratio=0.5
)
n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)])
n_num_features = n_features - n_cat_features
@@ -340,12 +342,12 @@ USE_PART = 1
def check_categorical_ohe( # pylint: disable=too-many-arguments
rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str
*, rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str
) -> None:
"Test for one-hot encoding with categorical data."
onehot, label = tm.make_categorical(rows, cols, cats, True)
cat, _ = tm.make_categorical(rows, cols, cats, False)
onehot, label = tm.make_categorical(rows, cols, cats, onehot=True)
cat, _ = tm.make_categorical(rows, cols, cats, onehot=False)
by_etl_results: Dict[str, Dict[str, List[float]]] = {}
by_builtin_results: Dict[str, Dict[str, List[float]]] = {}

View File

@@ -6,7 +6,7 @@ import socket
from enum import IntEnum, unique
from typing import Dict, Optional, Union
from .core import _LIB, _check_call, make_jcargs
from .core import _LIB, _check_call, _deprecate_positional_args, make_jcargs
def get_family(addr: str) -> int:
@@ -48,11 +48,13 @@ class RabitTracker:
HOST = 0
TASK = 1
@_deprecate_positional_args
def __init__( # pylint: disable=too-many-arguments
self,
n_workers: int,
host_ip: Optional[str],
port: int = 0,
*,
sortby: str = "host",
timeout: int = 0,
) -> None:

View File

@@ -288,6 +288,7 @@ def groups_to_rows(groups: np.ndarray, boundaries: np.ndarray) -> np.ndarray:
def mkgroupfold(
*,
dall: DMatrix,
nfold: int,
param: BoosterParam,
@@ -341,6 +342,7 @@ def mkgroupfold(
def mknfold(
*,
dall: DMatrix,
nfold: int,
param: BoosterParam,
@@ -361,7 +363,12 @@ def mknfold(
# Do standard k-fold cross validation. Automatically determine the folds.
if len(dall.get_uint_info("group_ptr")) > 1:
return mkgroupfold(
dall, nfold, param, evals=evals, fpreproc=fpreproc, shuffle=shuffle
dall=dall,
nfold=nfold,
param=param,
evals=evals,
fpreproc=fpreproc,
shuffle=shuffle,
)
if shuffle is True:
@@ -407,10 +414,12 @@ def mknfold(
return ret
@_deprecate_positional_args
def cv(
params: BoosterParam,
dtrain: DMatrix,
num_boost_round: int = 10,
*,
nfold: int = 3,
stratified: bool = False,
folds: XGBStratifiedKFold = None,
@@ -541,7 +550,15 @@ def cv(
results: Dict[str, List[float]] = {}
cvfolds = mknfold(
dtrain, nfold, params, seed, metrics, fpreproc, stratified, folds, shuffle
dall=dtrain,
nfold=nfold,
param=params,
seed=seed,
evals=metrics,
fpreproc=fpreproc,
stratified=stratified,
folds=folds,
shuffle=shuffle,
)
metric_fn = _configure_custom_metric(feval, custom_metric)