[dask] Add shap tests. (#6575)
This commit is contained in:
parent
7c9dcbedbc
commit
96d3d32265
7
.github/workflows/main.yml
vendored
7
.github/workflows/main.yml
vendored
@ -174,12 +174,7 @@ jobs:
|
|||||||
python -m pip install wheel setuptools mypy dask[complete] distributed
|
python -m pip install wheel setuptools mypy dask[complete] distributed
|
||||||
- name: Run mypy
|
- name: Run mypy
|
||||||
run: |
|
run: |
|
||||||
cd python-package
|
make mypy
|
||||||
# dask is required to pass, others are not
|
|
||||||
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent
|
|
||||||
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent
|
|
||||||
# If any of the above failed, contributor won't see the next error.
|
|
||||||
mypy . || true
|
|
||||||
|
|
||||||
doxygen:
|
doxygen:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
9
Makefile
9
Makefile
@ -86,6 +86,15 @@ cover: check
|
|||||||
)
|
)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
# dask is required to pass, others are not
|
||||||
|
# If any of the dask tests failed, contributor won't see the other error.
|
||||||
|
mypy:
|
||||||
|
cd python-package; \
|
||||||
|
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent; \
|
||||||
|
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent; \
|
||||||
|
mypy . || true ;
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o #xgboost
|
$(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o #xgboost
|
||||||
$(RM) -rf build_tests *.gcov tests/cpp/xgboost_test
|
$(RM) -rf build_tests *.gcov tests/cpp/xgboost_test
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import xgboost as xgb
|
|||||||
import sys
|
import sys
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import json
|
import json
|
||||||
from typing import List, Tuple, Union, Dict, Optional, Callable, Type
|
from typing import List, Tuple, Dict, Optional, Type, Any
|
||||||
import asyncio
|
import asyncio
|
||||||
import tempfile
|
import tempfile
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
@ -953,6 +953,73 @@ class TestWithDask:
|
|||||||
# Subtract the on disk resource from each worker
|
# Subtract the on disk resource from each worker
|
||||||
assert cnt - n_workers == n_partitions
|
assert cnt - n_workers == n_partitions
|
||||||
|
|
||||||
|
def run_shap(self, X: Any, y: Any, params: Dict[str, Any], client: "Client") -> None:
|
||||||
|
X, y = da.from_array(X), da.from_array(y)
|
||||||
|
|
||||||
|
Xy = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster']
|
||||||
|
|
||||||
|
test_Xy = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
|
||||||
|
shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute()
|
||||||
|
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
|
||||||
|
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
|
||||||
|
|
||||||
|
def run_shap_cls_sklearn(self, X: Any, y: Any, client: "Client") -> None:
|
||||||
|
X, y = da.from_array(X), da.from_array(y)
|
||||||
|
cls = xgb.dask.DaskXGBClassifier()
|
||||||
|
cls.client = client
|
||||||
|
cls.fit(X, y)
|
||||||
|
booster = cls.get_booster()
|
||||||
|
|
||||||
|
test_Xy = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
|
||||||
|
shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute()
|
||||||
|
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
|
||||||
|
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
|
||||||
|
|
||||||
|
def test_shap(self, client: "Client") -> None:
|
||||||
|
from sklearn.datasets import load_boston, load_digits
|
||||||
|
X, y = load_boston(return_X_y=True)
|
||||||
|
params = {'objective': 'reg:squarederror'}
|
||||||
|
self.run_shap(X, y, params, client)
|
||||||
|
|
||||||
|
X, y = load_digits(return_X_y=True)
|
||||||
|
params = {'objective': 'multi:softmax', 'num_class': 10}
|
||||||
|
self.run_shap(X, y, params, client)
|
||||||
|
params = {'objective': 'multi:softprob', 'num_class': 10}
|
||||||
|
self.run_shap(X, y, params, client)
|
||||||
|
|
||||||
|
self.run_shap_cls_sklearn(X, y, client)
|
||||||
|
|
||||||
|
def run_shap_interactions(
|
||||||
|
self,
|
||||||
|
X: Any,
|
||||||
|
y: Any,
|
||||||
|
params: Dict[str, Any],
|
||||||
|
client: "Client"
|
||||||
|
) -> None:
|
||||||
|
X, y = da.from_array(X), da.from_array(y)
|
||||||
|
|
||||||
|
Xy = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster']
|
||||||
|
|
||||||
|
test_Xy = xgb.dask.DaskDMatrix(client, X, y)
|
||||||
|
|
||||||
|
shap = xgb.dask.predict(
|
||||||
|
client, booster, test_Xy, pred_interactions=True
|
||||||
|
).compute()
|
||||||
|
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
|
||||||
|
assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
|
||||||
|
margin,
|
||||||
|
1e-5, 1e-5)
|
||||||
|
|
||||||
|
def test_shap_interactions(self, client: "Client") -> None:
|
||||||
|
from sklearn.datasets import load_boston
|
||||||
|
X, y = load_boston(return_X_y=True)
|
||||||
|
params = {'objective': 'reg:squarederror'}
|
||||||
|
self.run_shap_interactions(X, y, params, client)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_sklearn_io(self, client: 'Client') -> None:
|
def test_sklearn_io(self, client: 'Client') -> None:
|
||||||
from sklearn.datasets import load_digits
|
from sklearn.datasets import load_digits
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user