[dask] Rework base margin test. (#6627)
This commit is contained in:
parent
7bc56fa0ed
commit
a275f40267
@ -149,68 +149,30 @@ def test_dask_predict_shape_infer() -> None:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
|
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
|
||||||
def test_boost_from_prediction(tree_method: str) -> None:
|
def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
|
||||||
if tree_method == 'approx':
|
|
||||||
pytest.xfail(reason='test_boost_from_prediction[approx] is flaky')
|
|
||||||
|
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X_, y_ = load_breast_cancer(return_X_y=True)
|
||||||
|
|
||||||
X_ = dd.from_array(X, chunksize=100)
|
X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100)
|
||||||
y_ = dd.from_array(y, chunksize=100)
|
|
||||||
|
|
||||||
with LocalCluster(n_workers=4) as cluster:
|
|
||||||
with Client(cluster) as _:
|
|
||||||
model_0 = xgb.dask.DaskXGBClassifier(
|
model_0 = xgb.dask.DaskXGBClassifier(
|
||||||
learning_rate=0.3,
|
learning_rate=0.3, random_state=0, n_estimators=4,
|
||||||
random_state=123,
|
tree_method=tree_method)
|
||||||
n_estimators=4,
|
model_0.fit(X=X, y=y)
|
||||||
tree_method=tree_method,
|
margin = model_0.predict(X, output_margin=True)
|
||||||
)
|
|
||||||
model_0.fit(X=X_, y=y_)
|
|
||||||
margin = model_0.predict(X_, output_margin=True)
|
|
||||||
|
|
||||||
model_1 = xgb.dask.DaskXGBClassifier(
|
model_1 = xgb.dask.DaskXGBClassifier(
|
||||||
learning_rate=0.3,
|
learning_rate=0.3, random_state=0, n_estimators=4,
|
||||||
random_state=123,
|
tree_method=tree_method)
|
||||||
n_estimators=4,
|
model_1.fit(X=X, y=y, base_margin=margin)
|
||||||
tree_method=tree_method,
|
predictions_1 = model_1.predict(X, base_margin=margin)
|
||||||
)
|
|
||||||
model_1.fit(X=X_, y=y_, base_margin=margin)
|
|
||||||
predictions_1 = model_1.predict(X_, base_margin=margin)
|
|
||||||
proba_1 = model_1.predict_proba(X_, base_margin=margin)
|
|
||||||
|
|
||||||
cls_2 = xgb.dask.DaskXGBClassifier(
|
cls_2 = xgb.dask.DaskXGBClassifier(
|
||||||
learning_rate=0.3,
|
learning_rate=0.3, random_state=0, n_estimators=8,
|
||||||
random_state=123,
|
tree_method=tree_method)
|
||||||
n_estimators=8,
|
cls_2.fit(X=X, y=y)
|
||||||
tree_method=tree_method,
|
predictions_2 = cls_2.predict(X)
|
||||||
)
|
|
||||||
cls_2.fit(X=X_, y=y_)
|
|
||||||
predictions_2 = cls_2.predict(X_)
|
|
||||||
proba_2 = cls_2.predict_proba(X_)
|
|
||||||
|
|
||||||
cls_3 = xgb.dask.DaskXGBClassifier(
|
assert np.all(predictions_1.compute() == predictions_2.compute())
|
||||||
learning_rate=0.3,
|
|
||||||
random_state=123,
|
|
||||||
n_estimators=8,
|
|
||||||
tree_method=tree_method,
|
|
||||||
)
|
|
||||||
cls_3.fit(X=X_, y=y_)
|
|
||||||
proba_3 = cls_3.predict_proba(X_)
|
|
||||||
|
|
||||||
# compute variance of probability percentages between two of the
|
|
||||||
# same model, use this to check to make sure approx is functioning
|
|
||||||
# within normal parameters
|
|
||||||
expected_variance = np.max(np.abs(proba_3 - proba_2)).compute()
|
|
||||||
|
|
||||||
if expected_variance > 0:
|
|
||||||
margin_variance = np.max(np.abs(proba_1 - proba_2)).compute()
|
|
||||||
# Ensure the margin variance is less than the expected variance + 10%
|
|
||||||
assert np.all(margin_variance <= expected_variance + .1)
|
|
||||||
else:
|
|
||||||
np.testing.assert_equal(predictions_1.compute(), predictions_2.compute())
|
|
||||||
np.testing.assert_almost_equal(proba_1.compute(), proba_2.compute())
|
|
||||||
|
|
||||||
|
|
||||||
def test_dask_missing_value_reg() -> None:
|
def test_dask_missing_value_reg() -> None:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user