sync Jun 5
This commit is contained in:
@@ -17,34 +17,30 @@
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, String, Object
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
namespace xgboost::metric {
|
||||
|
||||
inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
// When the limit for precision is not given, it takes the limit at
|
||||
// std::numeric_limits<unsigned>::max(); hence all values are very small
|
||||
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx);
|
||||
std::unique_ptr<xgboost::Metric> metric{Metric::Create("pre", &ctx)};
|
||||
ASSERT_STREQ(metric->Name(), "pre");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5, 1e-7);
|
||||
EXPECT_NEAR(
|
||||
GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5, 1e-7);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("pre@2", &ctx);
|
||||
metric.reset(xgboost::Metric::Create("pre@2", &ctx));
|
||||
ASSERT_STREQ(metric->Name(), "pre@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
|
||||
EXPECT_NEAR(
|
||||
GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5f, 0.001f);
|
||||
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric.get(), {0, 1}, {}, {}, {}, data_split_mode));
|
||||
|
||||
delete metric;
|
||||
metric.reset(xgboost::Metric::Create("pre@4", &ctx));
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
|
||||
{0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f}, {}, {}, data_split_mode),
|
||||
0.5f, 1e-7);
|
||||
}
|
||||
|
||||
inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
@@ -187,5 +183,4 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 1.0, kRtEps);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -17,13 +17,15 @@
|
||||
#include "test_predictor.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(CpuPredictor, Basic) {
|
||||
|
||||
namespace {
|
||||
void TestBasic(DMatrix* dmat) {
|
||||
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
|
||||
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
size_t const kRows = dmat->Info().num_row_;
|
||||
size_t const kCols = dmat->Info().num_col_;
|
||||
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
|
||||
|
||||
@@ -31,12 +33,10 @@ TEST(CpuPredictor, Basic) {
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
@@ -44,26 +44,32 @@ TEST(CpuPredictor, Basic) {
|
||||
}
|
||||
|
||||
// Test predict instance
|
||||
auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto page = batch.GetView();
|
||||
for (size_t i = 0; i < batch.Size(); i++) {
|
||||
std::vector<float> instance_out_predictions;
|
||||
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model);
|
||||
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
|
||||
dmat->Info().IsColumnSplit());
|
||||
ASSERT_EQ(instance_out_predictions[0], 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
HostDeviceVector<float> leaf_out_predictions;
|
||||
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
|
||||
cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
|
||||
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
|
||||
for (auto v : h_leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
if (dmat->Info().IsColumnSplit()) {
|
||||
// Predict contribution is not supported for column split.
|
||||
return;
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
HostDeviceVector<float> out_contribution_hdv;
|
||||
auto& out_contribution = out_contribution_hdv.HostVector();
|
||||
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
|
||||
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
|
||||
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
@@ -76,8 +82,7 @@ TEST(CpuPredictor, Basic) {
|
||||
}
|
||||
}
|
||||
// Test predict contribution (approximate method)
|
||||
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model,
|
||||
0, nullptr, true);
|
||||
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is
|
||||
@@ -89,41 +94,32 @@ TEST(CpuPredictor, Basic) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
namespace {
|
||||
void TestColumnSplitPredictBatch() {
|
||||
TEST(CpuPredictor, Basic) {
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
TestBasic(dmat.get());
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplit() {
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
|
||||
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
|
||||
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
auto sliced = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
|
||||
cpu_predictor->PredictBatch(sliced.get(), &out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||
}
|
||||
TestBasic(dmat.get());
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(CpuPredictor, ColumnSplit) {
|
||||
TEST(CpuPredictor, ColumnSplitBasic) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplitPredictBatch);
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, IterationRange) {
|
||||
@@ -133,69 +129,8 @@ TEST(CpuPredictor, IterationRange) {
|
||||
TEST(CpuPredictor, ExternalMemory) {
|
||||
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
|
||||
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
|
||||
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
|
||||
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
|
||||
|
||||
LearnerModelParam mparam{MakeMP(dmat->Info().num_col_, .0, 1)};
|
||||
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
|
||||
ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
|
||||
for (const auto& v : out_predictions_h) {
|
||||
ASSERT_EQ(v, 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
HostDeviceVector<float> leaf_out_predictions;
|
||||
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
|
||||
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
|
||||
ASSERT_EQ(h_leaf_out_predictions.size(), dmat->Info().num_row_);
|
||||
for (const auto& v : h_leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
HostDeviceVector<float> out_contribution_hdv;
|
||||
auto& out_contribution = out_contribution_hdv.HostVector();
|
||||
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
|
||||
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
|
||||
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Test predict contribution (approximate method)
|
||||
HostDeviceVector<float> out_contribution_approximate_hdv;
|
||||
auto& out_contribution_approximate = out_contribution_approximate_hdv.HostVector();
|
||||
cpu_predictor->PredictContribution(
|
||||
dmat.get(), &out_contribution_approximate_hdv, model, 0, nullptr, true);
|
||||
ASSERT_EQ(out_contribution_approximate.size(),
|
||||
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
|
||||
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
TestBasic(dmat.get());
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, InplacePredict) {
|
||||
|
||||
Reference in New Issue
Block a user