[EM] Support SHAP contribution with QDM. (#10724)

- Add GPU support.
- Add external memory support.
- Update the GPU tree shap.
This commit is contained in:
Jiaming Yuan
2024-08-22 05:25:10 +08:00
committed by GitHub
parent cb54374550
commit 142bdc73ec
13 changed files with 274 additions and 159 deletions

View File

@@ -61,6 +61,12 @@ TEST(CpuPredictor, ExternalMemory) {
TestBasic(dmat.get(), &ctx);
}
TEST_P(ShapExternalMemoryTest, CPUPredictor) {
Context ctx;
auto [is_qdm, is_interaction] = this->GetParam();
this->Run(&ctx, is_qdm, is_interaction);
}
TEST(CpuPredictor, InplacePredict) {
bst_idx_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
@@ -110,7 +116,7 @@ void TestUpdatePredictionCache(bool use_subsampling) {
}
gbm->Configure(args);
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
auto dmat = RandomDataGenerator(kRows, kCols, 0).Classes(kClasses).GenerateDMatrix(true);
linalg::Matrix<GradientPair> gpair({kRows, kClasses}, ctx.Device());
auto h_gpair = gpair.HostView();
@@ -145,7 +151,7 @@ TEST(CPUPredictor, GHistIndexTraining) {
auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist, true);
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
}
TEST(CPUPredictor, CategoricalPrediction) {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023, XGBoost contributors
* Copyright 2017-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/c_api.h>
@@ -17,7 +17,6 @@
#include "test_predictor.h"
namespace xgboost::predictor {
TEST(GPUPredictor, Basic) {
auto cpu_lparam = MakeCUDACtx(-1);
auto gpu_lparam = MakeCUDACtx(0);
@@ -269,10 +268,9 @@ TEST(GPUPredictor, Shap) {
trees[0]->ExpandNode(0, 0, 0.5, true, 1.0, -1.0, 1.0, 0.0, 5.0, 2.0, 3.0);
model.CommitModelGroup(std::move(trees), 0);
auto gpu_lparam = MakeCUDACtx(0);
auto cpu_lparam = MakeCUDACtx(-1);
std::unique_ptr<Predictor> gpu_predictor = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", &gpu_lparam));
std::unique_ptr<Predictor> gpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &ctx));
std::unique_ptr<Predictor> cpu_predictor = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", &cpu_lparam));
gpu_predictor->Configure({});
@@ -289,6 +287,12 @@ TEST(GPUPredictor, Shap) {
}
}
TEST_P(ShapExternalMemoryTest, GPUPredictor) {
auto ctx = MakeCUDACtx(0);
auto [is_qdm, is_interaction] = this->GetParam();
this->Run(&ctx, is_qdm, is_interaction);
}
TEST(GPUPredictor, IterationRange) {
auto ctx = MakeCUDACtx(0);
TestIterationRange(&ctx);

View File

@@ -4,15 +4,16 @@
#include "test_predictor.h"
#include <gtest/gtest.h>
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
#include <xgboost/string_view.h> // for StringView
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/json.h> // for Json
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
#include <xgboost/string_view.h> // for StringView
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <unordered_map> // for unordered_map
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <unordered_map> // for unordered_map
#include "../../../src/common/bitfield.h" // for LBitField32
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
@@ -26,7 +27,6 @@
#include "xgboost/tree_model.h" // for RegTree
namespace xgboost {
void TestBasic(DMatrix* dmat, Context const *ctx) {
auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));
@@ -118,8 +118,7 @@ TEST(Predictor, PredictionCache) {
}
void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
bool check_contribs) {
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
size_t constexpr kCols = 16;
size_t constexpr kClasses = 3;
size_t constexpr kIters = 3;
@@ -163,34 +162,32 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
}
if (check_contribs) {
// Contributions
HostDeviceVector<float> from_full_contribs;
learner->Predict(p_full, false, &from_full_contribs, 0, 0, false, false, true);
HostDeviceVector<float> from_hist_contribs;
learner->Predict(p_hist, false, &from_hist_contribs, 0, 0, false, false, true);
for (size_t i = 0; i < from_full_contribs.ConstHostVector().size(); ++i) {
EXPECT_NEAR(from_hist_contribs.ConstHostVector()[i],
from_full_contribs.ConstHostVector()[i], kRtEps);
}
// Contributions
HostDeviceVector<float> from_full_contribs;
learner->Predict(p_full, false, &from_full_contribs, 0, 0, false, false, true);
HostDeviceVector<float> from_hist_contribs;
learner->Predict(p_hist, false, &from_hist_contribs, 0, 0, false, false, true);
for (size_t i = 0; i < from_full_contribs.ConstHostVector().size(); ++i) {
EXPECT_NEAR(from_hist_contribs.ConstHostVector()[i], from_full_contribs.ConstHostVector()[i],
kRtEps);
}
// Contributions (approximate method)
HostDeviceVector<float> from_full_approx_contribs;
learner->Predict(p_full, false, &from_full_approx_contribs, 0, 0, false, false, false, true);
HostDeviceVector<float> from_hist_approx_contribs;
learner->Predict(p_hist, false, &from_hist_approx_contribs, 0, 0, false, false, false, true);
for (size_t i = 0; i < from_full_approx_contribs.ConstHostVector().size(); ++i) {
EXPECT_NEAR(from_hist_approx_contribs.ConstHostVector()[i],
from_full_approx_contribs.ConstHostVector()[i], kRtEps);
}
// Contributions (approximate method)
HostDeviceVector<float> from_full_approx_contribs;
learner->Predict(p_full, false, &from_full_approx_contribs, 0, 0, false, false, false, true);
HostDeviceVector<float> from_hist_approx_contribs;
learner->Predict(p_hist, false, &from_hist_approx_contribs, 0, 0, false, false, false, true);
for (size_t i = 0; i < from_full_approx_contribs.ConstHostVector().size(); ++i) {
EXPECT_NEAR(from_hist_approx_contribs.ConstHostVector()[i],
from_full_approx_contribs.ConstHostVector()[i], kRtEps);
}
}
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,
bst_feature_t cols) {
std::size_t constexpr kClasses { 4 };
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device()).Classes(kClasses);
std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true);
std::unique_ptr<Learner> learner {
Learner::Create({m})
@@ -444,7 +441,8 @@ void TestIterationRange(Context const* ctx) {
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
auto dmat = RandomDataGenerator(kRows, kCols, 0)
.Device(ctx->Device())
.GenerateDMatrix(true, true, kClasses);
.Classes(kClasses)
.GenerateDMatrix(true);
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
bool bound = false;
@@ -515,7 +513,7 @@ void VerifyIterationRangeColumnSplit(bool use_gpu, Json const &ranged_model,
ctx.UpdateAllowUnknown(
Args{{"nthread", std::to_string(n_threads)}, {"device", ctx.DeviceName()}});
auto dmat = RandomDataGenerator(rows, cols, 0).GenerateDMatrix(true, true, classes);
auto dmat = RandomDataGenerator(rows, cols, 0).Classes(classes).GenerateDMatrix(true);
std::shared_ptr<DMatrix> Xy{dmat->SliceCol(world_size, rank)};
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
@@ -566,7 +564,7 @@ void VerifyIterationRangeColumnSplit(bool use_gpu, Json const &ranged_model,
void TestIterationRangeColumnSplit(int world_size, bool use_gpu) {
std::size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
auto dmat = RandomDataGenerator(kRows, kCols, 0).Classes(kClasses).GenerateDMatrix(true);
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(0);
@@ -835,4 +833,69 @@ void TestVectorLeafPrediction(Context const *ctx) {
data.HostVector().assign(data.Size(), model.trees.front()->SplitCond(RegTree::kRoot) - 1.0);
run_test(1.5, &data);
}
void ShapExternalMemoryTest::Run(Context const *ctx, bool is_qdm, bool is_interaction) {
bst_idx_t n_samples{2048};
bst_feature_t n_features{16};
bst_target_t n_classes{3};
bst_bin_t max_bin{64};
auto create_pfmat = [&](RandomDataGenerator &rng) {
if (is_qdm) {
return rng.Bins(max_bin).GenerateExtMemQuantileDMatrix("temp", true);
}
return rng.GenerateSparsePageDMatrix("temp", true);
};
auto p_fmat = create_pfmat(RandomDataGenerator(n_samples, n_features, 0)
.Batches(1)
.Device(ctx->Device())
.Classes(n_classes));
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParam("device", ctx->DeviceName());
learner->SetParam("base_score", "0.5");
learner->SetParam("num_parallel_tree", "3");
learner->SetParam("max_bin", std::to_string(max_bin));
for (std::int32_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_fmat);
}
Json model{Object{}};
learner->SaveModel(&model);
auto j_booster = model["learner"]["gradient_booster"]["model"];
auto model_param = MakeMP(n_features, 0.0, n_classes, ctx->Device());
gbm::GBTreeModel gbtree{&model_param, ctx};
gbtree.LoadModel(j_booster);
std::unique_ptr<Predictor> predictor{
Predictor::Create(ctx->IsCPU() ? "cpu_predictor" : "gpu_predictor", ctx)};
predictor->Configure({});
HostDeviceVector<float> contrib;
if (is_interaction) {
predictor->PredictInteractionContributions(p_fmat.get(), &contrib, gbtree);
} else {
predictor->PredictContribution(p_fmat.get(), &contrib, gbtree);
}
auto p_fmat_ext = create_pfmat(RandomDataGenerator(n_samples, n_features, 0)
.Batches(4)
.Device(ctx->Device())
.Classes(n_classes));
HostDeviceVector<float> contrib_ext;
if (is_interaction) {
predictor->PredictInteractionContributions(p_fmat_ext.get(), &contrib_ext, gbtree);
} else {
predictor->PredictContribution(p_fmat_ext.get(), &contrib_ext, gbtree);
}
ASSERT_EQ(contrib_ext.Size(), contrib.Size());
auto h_contrib = contrib.ConstHostSpan();
auto h_contrib_ext = contrib_ext.ConstHostSpan();
for (std::size_t i = 0; i < h_contrib.size(); ++i) {
ASSERT_EQ(h_contrib[i], h_contrib_ext[i]);
}
}
INSTANTIATE_TEST_SUITE_P(Predictor, ShapExternalMemoryTest,
::testing::Combine(::testing::Bool(), ::testing::Bool()));
} // namespace xgboost

View File

@@ -89,8 +89,7 @@ void TestBasic(DMatrix* dmat, Context const * ctx);
// p_full and p_hist should come from the same data set.
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
bool check_contribs = false);
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,
bst_feature_t cols);
@@ -114,6 +113,11 @@ void TestSparsePrediction(Context const* ctx, float sparsity);
void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity);
void TestVectorLeafPrediction(Context const* ctx);
class ShapExternalMemoryTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
public:
void Run(Context const* ctx, bool is_qdm, bool is_interaction);
};
} // namespace xgboost
#endif // XGBOOST_TEST_PREDICTOR_H_