Update GPUTreeShap, add docs (#6281)
* Update GPUTreeShap, add docs * Fix test Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
b180223d18
commit
f0c3ff313f
@ -1,3 +1,5 @@
|
|||||||
# GPU Acceleration Demo
|
# GPU Acceleration Demo
|
||||||
|
|
||||||
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
|
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
|
||||||
|
|
||||||
|
`shap.ipynb` demonstrates using GPU acceleration to compute SHAP values for feature importance.
|
||||||
|
|||||||
211
demo/gpu_acceleration/shap.ipynb
Normal file
211
demo/gpu_acceleration/shap.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -85,6 +85,19 @@ The GPU algorithms currently work with CLI, Python and R packages. See :doc:`/bu
|
|||||||
XGBRegressor(tree_method='gpu_hist', gpu_id=0)
|
XGBRegressor(tree_method='gpu_hist', gpu_id=0)
|
||||||
|
|
||||||
|
|
||||||
|
GPU-Accelerated SHAP values
|
||||||
|
=============================
|
||||||
|
XGBoost makes use of `GPUTreeShap <https://github.com/rapidsai/gputreeshap>`_ as a backend for computing shap values when the GPU predictor is selected.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
model.set_param({"predictor": "gpu_predictor"})
|
||||||
|
shap_values = model.predict(dtrain, pred_contribs=True)
|
||||||
|
shap_interaction_values = model.predict(dtrain, pred_interactions=True)
|
||||||
|
|
||||||
|
See examples `here
|
||||||
|
<https://github.com/dmlc/xgboost/tree/master/demo/gpu_acceleration>`_.
|
||||||
|
|
||||||
Multi-node Multi-GPU Training
|
Multi-node Multi-GPU Training
|
||||||
=============================
|
=============================
|
||||||
XGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_. For
|
XGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_. For
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
Subproject commit 5f33132d75482338f78cfba562791d8445e157f6
|
Subproject commit 3310a30bb123a49ab12c58e03edc2479512d2f64
|
||||||
@ -671,17 +671,6 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
model.learner_model_param->num_output_group);
|
model.learner_model_param->num_output_group);
|
||||||
out_contribs->Fill(0.0f);
|
out_contribs->Fill(0.0f);
|
||||||
auto phis = out_contribs->DeviceSpan();
|
auto phis = out_contribs->DeviceSpan();
|
||||||
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
|
|
||||||
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
|
|
||||||
float base_score = model.learner_model_param->base_score;
|
|
||||||
// Add the base margin term to last column
|
|
||||||
dh::LaunchN(
|
|
||||||
generic_param_->gpu_id,
|
|
||||||
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
|
|
||||||
[=] __device__(size_t idx) {
|
|
||||||
phis[(idx + 1) * contributions_columns - 1] =
|
|
||||||
margin.empty() ? base_score : margin[idx];
|
|
||||||
});
|
|
||||||
|
|
||||||
dh::device_vector<gpu_treeshap::PathElement> device_paths;
|
dh::device_vector<gpu_treeshap::PathElement> device_paths;
|
||||||
ExtractPaths(&device_paths, model, real_ntree_limit,
|
ExtractPaths(&device_paths, model, real_ntree_limit,
|
||||||
@ -695,6 +684,17 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
X, device_paths.begin(), device_paths.end(), ngroup,
|
X, device_paths.begin(), device_paths.end(), ngroup,
|
||||||
phis.data() + batch.base_rowid * contributions_columns, phis.size());
|
phis.data() + batch.base_rowid * contributions_columns, phis.size());
|
||||||
}
|
}
|
||||||
|
// Add the base margin term to last column
|
||||||
|
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
|
||||||
|
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
|
||||||
|
float base_score = model.learner_model_param->base_score;
|
||||||
|
dh::LaunchN(
|
||||||
|
generic_param_->gpu_id,
|
||||||
|
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
|
||||||
|
[=] __device__(size_t idx) {
|
||||||
|
phis[(idx + 1) * contributions_columns - 1] +=
|
||||||
|
margin.empty() ? base_score : margin[idx];
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||||
@ -726,21 +726,6 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
model.learner_model_param->num_output_group);
|
model.learner_model_param->num_output_group);
|
||||||
out_contribs->Fill(0.0f);
|
out_contribs->Fill(0.0f);
|
||||||
auto phis = out_contribs->DeviceSpan();
|
auto phis = out_contribs->DeviceSpan();
|
||||||
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
|
|
||||||
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
|
|
||||||
float base_score = model.learner_model_param->base_score;
|
|
||||||
// Add the base margin term to last column
|
|
||||||
size_t n_features = model.learner_model_param->num_feature;
|
|
||||||
dh::LaunchN(
|
|
||||||
generic_param_->gpu_id,
|
|
||||||
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
|
|
||||||
[=] __device__(size_t idx) {
|
|
||||||
size_t group = idx % ngroup;
|
|
||||||
size_t row_idx = idx / ngroup;
|
|
||||||
phis[gpu_treeshap::IndexPhiInteractions(
|
|
||||||
row_idx, ngroup, group, n_features, n_features, n_features)] =
|
|
||||||
margin.empty() ? base_score : margin[idx];
|
|
||||||
});
|
|
||||||
|
|
||||||
dh::device_vector<gpu_treeshap::PathElement> device_paths;
|
dh::device_vector<gpu_treeshap::PathElement> device_paths;
|
||||||
ExtractPaths(&device_paths, model, real_ntree_limit,
|
ExtractPaths(&device_paths, model, real_ntree_limit,
|
||||||
@ -754,6 +739,21 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
X, device_paths.begin(), device_paths.end(), ngroup,
|
X, device_paths.begin(), device_paths.end(), ngroup,
|
||||||
phis.data() + batch.base_rowid * contributions_columns, phis.size());
|
phis.data() + batch.base_rowid * contributions_columns, phis.size());
|
||||||
}
|
}
|
||||||
|
// Add the base margin term to last column
|
||||||
|
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
|
||||||
|
const auto margin = p_fmat->Info().base_margin_.ConstDeviceSpan();
|
||||||
|
float base_score = model.learner_model_param->base_score;
|
||||||
|
size_t n_features = model.learner_model_param->num_feature;
|
||||||
|
dh::LaunchN(
|
||||||
|
generic_param_->gpu_id,
|
||||||
|
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
|
||||||
|
[=] __device__(size_t idx) {
|
||||||
|
size_t group = idx % ngroup;
|
||||||
|
size_t row_idx = idx / ngroup;
|
||||||
|
phis[gpu_treeshap::IndexPhiInteractions(
|
||||||
|
row_idx, ngroup, group, n_features, n_features, n_features)] +=
|
||||||
|
margin.empty() ? base_score : margin[idx];
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user