Implement categorical data support for SHAP. (#7053)

* Add CPU implementation.
* Update GPUTreeSHAP.
* Add GPU implementation by defining custom split condition.
This commit is contained in:
Jiaming Yuan
2021-06-25 19:02:46 +08:00
committed by GitHub
parent 663136aa08
commit 8fa32fdda2
12 changed files with 287 additions and 50 deletions

View File

@@ -5,7 +5,7 @@ import numpy as np
import xgboost as xgb
from xgboost.compat import PANDAS_INSTALLED
from hypothesis import given, strategies, assume, settings, note
from hypothesis import given, strategies, assume, settings
if PANDAS_INSTALLED:
from hypothesis.extra.pandas import column, data_frames, range_indexes
@@ -275,6 +275,25 @@ class TestGPUPredict:
margin,
1e-3, 1e-3)
def test_shap_categorical(self):
X, y = tm.make_categorical(100, 20, 7, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
booster.set_param({"predictor": "gpu_predictor"})
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
)
booster.set_param({"predictor": "cpu_predictor"})
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
)
def test_predict_leaf_basic(self):
gpu_leaf = run_predict_leaf('gpu_predictor')
cpu_leaf = run_predict_leaf('cpu_predictor')