[CI] Fix PyLint errors. (#10837)

This commit is contained in:
Jiaming Yuan
2024-09-24 14:09:32 +08:00
committed by GitHub
parent 982ee34658
commit 68a8865bc5
26 changed files with 349 additions and 118 deletions

View File

@@ -104,7 +104,7 @@ class TestBoosterIO:
self.run_model_json_io(parameters, ext)
def test_categorical_model_io(self) -> None:
X, y = tm.make_categorical(256, 16, 71, False)
X, y = tm.make_categorical(256, 16, 71, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
predt_0 = booster.predict(Xy)

View File

@@ -49,7 +49,7 @@ class TestTreesToDataFrame:
assert np.allclose(cover_from_dump, cover_from_df)
def run_tree_to_df_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(100, 10, 31, False)
X, y = tm.make_categorical(100, 10, 31, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe()
@@ -61,7 +61,7 @@ class TestTreesToDataFrame:
self.run_tree_to_df_categorical("approx")
def run_split_value_histograms(self, tree_method) -> None:
X, y = tm.make_categorical(1000, 10, 13, False)
X, y = tm.make_categorical(1000, 10, 13, onehot=False)
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
reg.fit(X, y)

View File

@@ -97,14 +97,15 @@ class TestQuantileDMatrix:
if sparsity == 0.0:
it = IteratorForTest(
*make_batches(n_samples_per_batch, n_features, n_batches, False), None
*make_batches(n_samples_per_batch, n_features, n_batches, False),
cache=None,
)
else:
it = IteratorForTest(
*make_batches_sparse(
n_samples_per_batch, n_features, n_batches, sparsity
),
None,
cache=None,
)
Xy = xgb.QuantileDMatrix(it)
assert Xy.num_row() == n_samples_per_batch * n_batches
@@ -134,14 +135,15 @@ class TestQuantileDMatrix:
n_batches = 7
if sparsity == 0.0:
it = IteratorForTest(
*make_batches(n_samples_per_batch, n_features, n_batches, False), None
*make_batches(n_samples_per_batch, n_features, n_batches, False),
cache=None,
)
else:
it = IteratorForTest(
*make_batches_sparse(
n_samples_per_batch, n_features, n_batches, sparsity
),
None,
cache=None,
)
parameters = {"tree_method": "hist", "max_bin": 256}

View File

@@ -81,23 +81,26 @@ class TestTreeMethod:
@pytest.mark.skipif(**tm.no_sklearn())
def test_pruner(self):
import sklearn
params = {'tree_method': 'exact'}
params = {"tree_method": "exact"}
cancer = sklearn.datasets.load_breast_cancer()
X = cancer['data']
X = cancer["data"]
y = cancer["target"]
dtrain = xgb.DMatrix(X, y)
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)
grown = str(booster.get_dump())
params = {'updater': 'prune', 'process_type': 'update', 'gamma': '0.2'}
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10,
xgb_model=booster)
params = {"updater": "prune", "process_type": "update", "gamma": "0.2"}
booster = xgb.train(
params, dtrain=dtrain, num_boost_round=10, xgb_model=booster
)
after_prune = str(booster.get_dump())
assert grown != after_prune
booster = xgb.train(params, dtrain=dtrain, num_boost_round=10,
xgb_model=booster)
booster = xgb.train(
params, dtrain=dtrain, num_boost_round=10, xgb_model=booster
)
second_prune = str(booster.get_dump())
# Second prune should not change the tree
assert after_prune == second_prune
@@ -107,11 +110,12 @@ class TestTreeMethod:
hist_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.make_dataset_strategy()
tm.make_dataset_strategy(),
)
@settings(deadline=None, print_blob=True)
def test_hist(
self, param: Dict[str, Any],
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
@@ -128,11 +132,13 @@ class TestTreeMethod:
def test_hist_categorical(self):
# hist must be same as exact on all-categorial data
ag_dtrain, ag_dtest = tm.load_agaricus(__file__)
ag_param = {'max_depth': 2,
'tree_method': 'hist',
'eta': 1,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param = {
"max_depth": 2,
"tree_method": "hist",
"eta": 1,
"objective": "binary:logistic",
"eval_metric": "auc",
}
hist_res = {}
exact_res = {}
@@ -141,7 +147,7 @@ class TestTreeMethod:
ag_dtrain,
10,
evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
evals_result=hist_res
evals_result=hist_res,
)
ag_param["tree_method"] = "exact"
xgb.train(
@@ -149,10 +155,10 @@ class TestTreeMethod:
ag_dtrain,
10,
evals=[(ag_dtrain, "train"), (ag_dtest, "test")],
evals_result=exact_res
evals_result=exact_res,
)
assert hist_res['train']['auc'] == exact_res['train']['auc']
assert hist_res['test']['auc'] == exact_res['test']['auc']
assert hist_res["train"]["auc"] == exact_res["train"]["auc"]
assert hist_res["test"]["auc"] == exact_res["test"]["auc"]
@pytest.mark.skipif(**tm.no_sklearn())
def test_hist_degenerate_case(self):
@@ -160,11 +166,17 @@ class TestTreeMethod:
# quantile points for a particular feature (the second feature in
# this example). Source: https://github.com/dmlc/xgboost/issues/2943
nan = np.nan
param = {'missing': nan, 'tree_method': 'hist'}
param = {"missing": nan, "tree_method": "hist"}
model = xgb.XGBRegressor(**param)
X = np.array([[6.18827160e+05, 1.73000000e+02], [6.37345679e+05, nan],
[6.38888889e+05, nan], [6.28086420e+05, nan]])
y = [1000000., 0., 0., 500000.]
X = np.array(
[
[6.18827160e05, 1.73000000e02],
[6.37345679e05, nan],
[6.38888889e05, nan],
[6.28086420e05, nan],
]
)
y = [1000000.0, 0.0, 0.0, 500000.0]
w = [0, 0, 1, 0]
model.fit(X, y, sample_weight=w)
@@ -174,12 +186,12 @@ class TestTreeMethod:
param = {"tree_method": "hist", "max_bin": 64}
hist_result = train_result(param, dataset.get_dmat(), 16)
note(str(hist_result))
assert tm.non_increasing(hist_result['train'][dataset.metric])
assert tm.non_increasing(hist_result["train"][dataset.metric])
param = {"tree_method": "approx", "max_bin": 64}
approx_result = train_result(param, dataset.get_dmat(), 16)
note(str(approx_result))
assert tm.non_increasing(approx_result['train'][dataset.metric])
assert tm.non_increasing(approx_result["train"][dataset.metric])
np.testing.assert_allclose(
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
@@ -248,15 +260,33 @@ class TestTreeMethod:
def test_max_cat(self, tree_method) -> None:
self.run_max_cat(tree_method)
@given(strategies.integers(10, 400), strategies.integers(3, 8),
strategies.integers(1, 2), strategies.integers(4, 7))
@given(
strategies.integers(10, 400),
strategies.integers(3, 8),
strategies.integers(1, 2),
strategies.integers(4, 7),
)
@settings(deadline=None, print_blob=True)
@pytest.mark.skipif(**tm.no_pandas())
def test_categorical_ohe(
self, rows: int, cols: int, rounds: int, cats: int
) -> None:
check_categorical_ohe(rows, cols, rounds, cats, "cpu", "approx")
check_categorical_ohe(rows, cols, rounds, cats, "cpu", "hist")
check_categorical_ohe(
rows=rows,
cols=cols,
rounds=rounds,
cats=cats,
device="cpu",
tree_method="approx",
)
check_categorical_ohe(
rows=rows,
cols=cols,
rounds=rounds,
cats=cats,
device="cpu",
tree_method="hist",
)
@given(
tm.categorical_dataset_strategy,
@@ -307,7 +337,7 @@ class TestTreeMethod:
@given(
strategies.integers(10, 400),
strategies.integers(3, 8),
strategies.integers(4, 7)
strategies.integers(4, 7),
)
@settings(deadline=None, print_blob=True)
@pytest.mark.skipif(**tm.no_pandas())
@@ -395,9 +425,8 @@ class TestTreeMethod:
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.parametrize(
"tree_method,weighted", [
("approx", False), ("hist", False), ("approx", True), ("hist", True)
]
"tree_method,weighted",
[("approx", False), ("hist", False), ("approx", True), ("hist", True)],
)
def test_adaptive(self, tree_method, weighted) -> None:
self.run_adaptive(tree_method, weighted)

View File

@@ -1161,14 +1161,24 @@ def test_feature_weights(tree_method):
parser_path = os.path.join(tm.demo_dir(__file__), "json-model", "json_parser.py")
poly_increasing = get_feature_weights(
X, y, fw, parser_path, tree_method, xgb.XGBRegressor
X=X,
y=y,
fw=fw,
parser_path=parser_path,
tree_method=tree_method,
model=xgb.XGBRegressor,
)
fw = np.ones(shape=(kCols,))
for i in range(kCols):
fw[i] *= float(kCols - i)
poly_decreasing = get_feature_weights(
X, y, fw, parser_path, tree_method, xgb.XGBRegressor
X=X,
y=y,
fw=fw,
parser_path=parser_path,
tree_method=tree_method,
model=xgb.XGBRegressor,
)
# Approxmated test, this is dependent on the implementation of random