Make sure metrics work with federated learning (#9037)

This commit is contained in:
Rong Ou
2023-04-19 00:39:11 -07:00
committed by GitHub
parent ef13dd31b1
commit 42d100de18
11 changed files with 451 additions and 152 deletions

View File

@@ -189,7 +189,9 @@ double GetMultiMetricEval(xgboost::Metric* metric,
info.weights_.HostVector() = weights;
info.group_ptr_ = groups;
info.data_split_mode = data_split_mode;
if (info.IsVerticalFederated() && xgboost::collective::GetRank() != 0) {
info.labels.Reshape(0);
}
return metric->Evaluate(preds, p_fmat);
}

View File

@@ -2,109 +2,13 @@
* Copyright (c) by Contributors 2020
*/
#include <gtest/gtest.h>
#include <cmath>
#include "test_survival_metric.h"
#include "xgboost/metric.h"
#include "../helpers.h"
#include "../../../src/common/survival_util.h"
/** Tests for Survival metrics that should run both on CPU and GPU **/
namespace xgboost {
namespace common {
namespace {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
metric->Configure(Args{});
HostDeviceVector<float> predts;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
size_t n_samples = 2048;
h_predts.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_predts[i] = dist(&lcg);
}
auto &h_upper = info.labels_upper_bound_.HostVector();
auto &h_lower = info.labels_lower_bound_.HostVector();
h_lower.resize(n_samples);
h_upper.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_lower[i] = 1;
h_upper[i] = 10;
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
/**
* Test aggregate output from the AFT metric over a small test data set.
* This is unlike AFTLoss.* tests, which verify metric values over individual data points.
**/
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector()
= { 100.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector()
= { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
info.data_split_mode = data_split_mode;
HostDeviceVector<bst_float> preds(4, std::log(64));
struct TestCase {
std::string dist_type;
bst_float reference_value;
};
for (const auto& test_case : std::vector<TestCase>{ {"normal", 2.1508f}, {"logistic", 2.1804f},
{"extreme", 2.0706f} }) {
std::unique_ptr<Metric> metric(Metric::Create("aft-nloglik", &ctx));
metric->Configure({ {"aft_loss_distribution", test_case.dist_type},
{"aft_loss_distribution_scale", "1.0"} });
EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);
}
}
void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
info.data_split_mode = data_split_mode;
HostDeviceVector<bst_float> preds(4, std::log(60.0f));
std::unique_ptr<Metric> metric(Metric::Create("interval-regression-accuracy", &ctx));
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);
info.labels_lower_bound_.HostVector()[2] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_lower_bound_.HostVector()[0] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);
CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX);
}
} // anonymous namespace
TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { VerifyAFTNegLogLik(); }
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikRowSplit) {
@@ -140,6 +44,5 @@ TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) {
CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GPUIDX);
}
} // namespace common
} // namespace xgboost

View File

@@ -0,0 +1,107 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <cmath>
#include "../../../src/common/survival_util.h"
#include "../helpers.h"
#include "xgboost/metric.h"
namespace xgboost {
namespace common {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
metric->Configure(Args{});
HostDeviceVector<float> predts;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
size_t n_samples = 2048;
h_predts.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_predts[i] = dist(&lcg);
}
auto &h_upper = info.labels_upper_bound_.HostVector();
auto &h_lower = info.labels_lower_bound_.HostVector();
h_lower.resize(n_samples);
h_upper.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_lower[i] = 1;
h_upper[i] = 10;
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
inline void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
/**
* Test aggregate output from the AFT metric over a small test data set.
* This is unlike AFTLoss.* tests, which verify metric values over individual data points.
**/
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector()
= { 100.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector()
= { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
info.data_split_mode = data_split_mode;
HostDeviceVector<bst_float> preds(4, std::log(64));
struct TestCase {
std::string dist_type;
bst_float reference_value;
};
for (const auto& test_case : std::vector<TestCase>{ {"normal", 2.1508f}, {"logistic", 2.1804f},
{"extreme", 2.0706f} }) {
std::unique_ptr<Metric> metric(Metric::Create("aft-nloglik", &ctx));
metric->Configure({ {"aft_loss_distribution", test_case.dist_type},
{"aft_loss_distribution_scale", "1.0"} });
EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);
}
}
inline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
info.data_split_mode = data_split_mode;
HostDeviceVector<bst_float> preds(4, std::log(60.0f));
std::unique_ptr<Metric> metric(Metric::Create("interval-regression-accuracy", &ctx));
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);
info.labels_lower_bound_.HostVector()[2] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_lower_bound_.HostVector()[0] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);
CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX);
}
} // namespace common
} // namespace xgboost

View File

@@ -65,7 +65,7 @@ class BaseFederatedTest : public ::testing::Test {
void TearDown() override { server_.reset(nullptr); }
static int const kWorldSize{3};
static int constexpr kWorldSize{3};
std::unique_ptr<ServerForTest> server_;
};

View File

@@ -70,7 +70,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e
class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
std::unique_ptr<ServerForTest> server_;
static int const kWorldSize{3};
static int constexpr kWorldSize{3};
protected:
void SetUp() override { server_ = std::make_unique<ServerForTest>(kWorldSize); }

View File

@@ -0,0 +1,243 @@
/*!
* Copyright 2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../metric/test_auc.h"
#include "../metric/test_elementwise_metric.h"
#include "../metric/test_multiclass_metric.h"
#include "../metric/test_rank_metric.h"
#include "../metric/test_survival_metric.h"
#include "helpers.h"
namespace {
class FederatedMetricTest : public xgboost::BaseFederatedTest {};
} // anonymous namespace
namespace xgboost {
namespace metric {
TEST_F(FederatedMetricTest, BinaryAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, BinaryAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RankingAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RankingAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, PRAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, PRAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassPRAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassPRAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RankingPRAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RankingPRAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RMSERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RMSEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RMSLERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RMSLEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MAERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MAEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MAPERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MAPEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MPHERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MPHEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, LogLossRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, LogLossColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, ErrorRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, ErrorColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, PoissonNegLogLikRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, PoissonNegLogLikColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiRMSERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiRMSEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, QuantileRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, QuantileColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassErrorRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassErrorColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassLogLossRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassLogLossColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, PrecisionRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, PrecisionColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, NDCGRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, NDCGColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MAPRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MAPColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, NDCGExpGainRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, NDCGExpGainColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain,
DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost
namespace xgboost {
namespace common {
TEST_F(FederatedMetricTest, AFTNegLogLikRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, AFTNegLogLikColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, IntervalRegressionAccuracyRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, IntervalRegressionAccuracyColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy,
DataSplitMode::kCol);
}
} // namespace common
} // namespace xgboost