[dask] Add shap tests. (#6575)

This commit is contained in:
Jiaming Yuan 2021-01-08 14:59:27 +08:00 committed by GitHub
parent 7c9dcbedbc
commit 96d3d32265
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 78 additions and 7 deletions

View File

@ -174,12 +174,7 @@ jobs:
python -m pip install wheel setuptools mypy dask[complete] distributed python -m pip install wheel setuptools mypy dask[complete] distributed
- name: Run mypy - name: Run mypy
run: | run: |
cd python-package make mypy
# dask is required to pass, others are not
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent
# If any of the above failed, contributor won't see the next error.
mypy . || true
doxygen: doxygen:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@ -86,6 +86,15 @@ cover: check
) )
endif endif
# dask is required to pass, others are not
# If any of the dask tests failed, contributor won't see the other error.
mypy:
cd python-package; \
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent; \
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent; \
mypy . || true ;
clean: clean:
$(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o #xgboost $(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o #xgboost
$(RM) -rf build_tests *.gcov tests/cpp/xgboost_test $(RM) -rf build_tests *.gcov tests/cpp/xgboost_test

View File

@ -6,7 +6,7 @@ import xgboost as xgb
import sys import sys
import numpy as np import numpy as np
import json import json
from typing import List, Tuple, Union, Dict, Optional, Callable, Type from typing import List, Tuple, Dict, Optional, Type, Any
import asyncio import asyncio
import tempfile import tempfile
from sklearn.datasets import make_classification from sklearn.datasets import make_classification
@ -953,6 +953,73 @@ class TestWithDask:
# Subtract the on disk resource from each worker # Subtract the on disk resource from each worker
assert cnt - n_workers == n_partitions assert cnt - n_workers == n_partitions
def run_shap(self, X: Any, y: Any, params: Dict[str, Any], client: "Client") -> None:
X, y = da.from_array(X), da.from_array(y)
Xy = xgb.dask.DaskDMatrix(client, X, y)
booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster']
test_Xy = xgb.dask.DaskDMatrix(client, X, y)
shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute()
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
def run_shap_cls_sklearn(self, X: Any, y: Any, client: "Client") -> None:
X, y = da.from_array(X), da.from_array(y)
cls = xgb.dask.DaskXGBClassifier()
cls.client = client
cls.fit(X, y)
booster = cls.get_booster()
test_Xy = xgb.dask.DaskDMatrix(client, X, y)
shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute()
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
def test_shap(self, client: "Client") -> None:
from sklearn.datasets import load_boston, load_digits
X, y = load_boston(return_X_y=True)
params = {'objective': 'reg:squarederror'}
self.run_shap(X, y, params, client)
X, y = load_digits(return_X_y=True)
params = {'objective': 'multi:softmax', 'num_class': 10}
self.run_shap(X, y, params, client)
params = {'objective': 'multi:softprob', 'num_class': 10}
self.run_shap(X, y, params, client)
self.run_shap_cls_sklearn(X, y, client)
def run_shap_interactions(
self,
X: Any,
y: Any,
params: Dict[str, Any],
client: "Client"
) -> None:
X, y = da.from_array(X), da.from_array(y)
Xy = xgb.dask.DaskDMatrix(client, X, y)
booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster']
test_Xy = xgb.dask.DaskDMatrix(client, X, y)
shap = xgb.dask.predict(
client, booster, test_Xy, pred_interactions=True
).compute()
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-5, 1e-5)
def test_shap_interactions(self, client: "Client") -> None:
from sklearn.datasets import load_boston
X, y = load_boston(return_X_y=True)
params = {'objective': 'reg:squarederror'}
self.run_shap_interactions(X, y, params, client)
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn_io(self, client: 'Client') -> None: def test_sklearn_io(self, client: 'Client') -> None:
from sklearn.datasets import load_digits from sklearn.datasets import load_digits