diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 920e81dcd..26b5a85b6 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -430,8 +430,7 @@ class ColumnSplitHelper { << "column-split prediction is only supported for distributed training"; for (auto const &batch : p_fmat->GetBatches()) { - CHECK_EQ(out_preds->size(), - p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group); + CHECK_EQ(out_preds->size(), p_fmat->Info().num_row_ * (tree_end_ - tree_begin_)); PredictBatchKernel(SparsePageView{&batch}, out_preds); } } @@ -543,8 +542,12 @@ class ColumnSplitHelper { for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) { auto const gid = model_.tree_info[tree_id]; for (size_t i = 0; i < block_size; ++i) { - preds[(predict_offset + i) * num_group + gid] += - PredictOneTree(tree_id, batch_offset + i); + auto const result = PredictOneTree(tree_id, batch_offset + i); + if constexpr (predict_leaf) { + preds[(predict_offset + i) * (tree_end_ - tree_begin_) + tree_id] = result; + } else { + preds[(predict_offset + i) * num_group + gid] += result; + } } } } @@ -645,6 +648,9 @@ class CPUPredictor : public Predictor { void PredictDMatrix(DMatrix *p_fmat, std::vector *out_preds, gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const { if (p_fmat->Info().IsColumnSplit()) { + CHECK(!model.learner_model_param->IsVectorLeaf()) + << "Predict DMatrix with column split" << MTNotImplemented(); + ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end); helper.PredictDMatrix(p_fmat, out_preds); return; @@ -743,6 +749,8 @@ class CPUPredictor : public Predictor { unsigned tree_end) const override { auto proxy = dynamic_cast(p_m.get()); CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input."; + CHECK(!p_m->Info().IsColumnSplit()) + << "Inplace predict support for column-wise data split is not yet implemented."; auto x = proxy->Adapter(); if (x.type() == typeid(std::shared_ptr)) { this->DispatchedInplacePredict( @@ -773,6 +781,9 @@ class CPUPredictor : public Predictor { out_preds->resize(model.learner_model_param->num_output_group); if (is_column_split) { + CHECK(!model.learner_model_param->IsVectorLeaf()) + << "Predict instance with column split" << MTNotImplemented(); + ColumnSplitHelper helper(this->ctx_->Threads(), model, 0, ntree_limit); helper.PredictInstance(inst, out_preds); return; @@ -802,6 +813,9 @@ class CPUPredictor : public Predictor { preds.resize(info.num_row_ * ntree_limit); if (p_fmat->Info().IsColumnSplit()) { + CHECK(!model.learner_model_param->IsVectorLeaf()) + << "Predict leaf with column split" << MTNotImplemented(); + ColumnSplitHelper helper(n_threads, model, 0, ntree_limit); helper.PredictLeaf(p_fmat, &preds); return; diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc index 279ba6118..d24a2f458 100644 --- a/tests/cpp/predictor/test_cpu_predictor.cc +++ b/tests/cpp/predictor/test_cpu_predictor.cc @@ -117,7 +117,7 @@ void TestColumnSplit() { } } // anonymous namespace -TEST(CpuPredictor, ColumnSplitBasic) { +TEST(CpuPredictor, BasicColumnSplit) { auto constexpr kWorldSize = 2; RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit); } @@ -126,6 +126,10 @@ TEST(CpuPredictor, IterationRange) { TestIterationRange("cpu_predictor"); } +TEST(CpuPredictor, IterationRangeColmnSplit) { + TestIterationRangeColumnSplit("cpu_predictor"); +} + TEST(CpuPredictor, ExternalMemory) { size_t constexpr kPageSize = 64, kEntriesPerCol = 3; size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2; @@ -223,10 +227,18 @@ TEST(CPUPredictor, CategoricalPrediction) { TestCategoricalPrediction("cpu_predictor"); } +TEST(CPUPredictor, CategoricalPredictionColumnSplit) { + TestCategoricalPredictionColumnSplit("cpu_predictor"); +} + TEST(CPUPredictor, CategoricalPredictLeaf) { TestCategoricalPredictLeaf(StringView{"cpu_predictor"}); } +TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) { + TestCategoricalPredictLeafColumnSplit(StringView{"cpu_predictor"}); +} + TEST(CpuPredictor, UpdatePredictionCache) { TestUpdatePredictionCache(false); TestUpdatePredictionCache(true); @@ -236,11 +248,20 @@ TEST(CpuPredictor, LesserFeatures) { TestPredictionWithLesserFeatures("cpu_predictor"); } +TEST(CpuPredictor, LesserFeaturesColumnSplit) { + TestPredictionWithLesserFeaturesColumnSplit("cpu_predictor"); +} + TEST(CpuPredictor, Sparse) { TestSparsePrediction(0.2, "cpu_predictor"); TestSparsePrediction(0.8, "cpu_predictor"); } +TEST(CpuPredictor, SparseColumnSplit) { + TestSparsePredictionColumnSplit(0.2, "cpu_predictor"); + TestSparsePredictionColumnSplit(0.8, "cpu_predictor"); +} + TEST(CpuPredictor, Multi) { Context ctx; ctx.nthread = 1; diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index fecb5028a..a0d917ae8 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -209,7 +209,6 @@ TEST(GPUPredictor, IterationRange) { TestIterationRange("gpu_predictor"); } - TEST(GPUPredictor, CategoricalPrediction) { TestCategoricalPrediction("gpu_predictor"); } diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc index 575a85497..fb7e7fb8a 100644 --- a/tests/cpp/predictor/test_predictor.cc +++ b/tests/cpp/predictor/test_predictor.cc @@ -153,28 +153,32 @@ void TestInplacePrediction(std::shared_ptr x, std::string predictor, bs learner->Configure(); } -void TestPredictionWithLesserFeatures(std::string predictor_name) { - size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4; - auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true); - auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false); - std::unique_ptr learner{Learner::Create({m_train})}; - - for (size_t i = 0; i < kIters; ++i) { - learner->UpdateOneIter(i, m_train); +namespace { +std::unique_ptr LearnerForTest(std::shared_ptr dmat, size_t iters, + size_t forest = 1) { + std::unique_ptr learner{Learner::Create({dmat})}; + learner->SetParams(Args{{"num_parallel_tree", std::to_string(forest)}}); + for (size_t i = 0; i < iters; ++i) { + learner->UpdateOneIter(i, dmat); } + return learner; +} +void VerifyPredictionWithLesserFeatures(Learner *learner, std::string const &predictor_name, + size_t rows, std::shared_ptr const &m_test, + std::shared_ptr const &m_invalid) { HostDeviceVector prediction; learner->SetParam("predictor", predictor_name); learner->Configure(); Json config{Object()}; learner->SaveConfig(&config); - ASSERT_EQ(get(config["learner"]["gradient_booster"]["gbtree_train_param"]["predictor"]), predictor_name); + ASSERT_EQ(get(config["learner"]["gradient_booster"]["gbtree_train_param"]["predictor"]), + predictor_name); learner->Predict(m_test, false, &prediction, 0, 0); - ASSERT_EQ(prediction.Size(), kRows); + ASSERT_EQ(prediction.Size(), rows); - auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false); - ASSERT_THROW({learner->Predict(m_invalid, false, &prediction, 0, 0);}, dmlc::Error); + ASSERT_THROW({ learner->Predict(m_invalid, false, &prediction, 0, 0); }, dmlc::Error); #if defined(XGBOOST_USE_CUDA) HostDeviceVector from_cpu; @@ -185,13 +189,49 @@ void TestPredictionWithLesserFeatures(std::string predictor_name) { learner->SetParam("predictor", "gpu_predictor"); learner->Predict(m_test, false, &from_cuda, 0, 0); - auto const& h_cpu = from_cpu.ConstHostVector(); - auto const& h_gpu = from_cuda.ConstHostVector(); + auto const &h_cpu = from_cpu.ConstHostVector(); + auto const &h_gpu = from_cuda.ConstHostVector(); for (size_t i = 0; i < h_cpu.size(); ++i) { ASSERT_NEAR(h_cpu[i], h_gpu[i], kRtEps); } #endif // defined(XGBOOST_USE_CUDA) } +} // anonymous namespace + +void TestPredictionWithLesserFeatures(std::string predictor_name) { + size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4; + auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true); + auto learner = LearnerForTest(m_train, kIters); + auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false); + auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false); + VerifyPredictionWithLesserFeatures(learner.get(), predictor_name, kRows, m_test, m_invalid); +} + +namespace { +void VerifyPredictionWithLesserFeaturesColumnSplit(Learner *learner, + std::string const &predictor_name, size_t rows, + std::shared_ptr m_test, + std::shared_ptr m_invalid) { + auto const world_size = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + std::shared_ptr sliced_test{m_test->SliceCol(world_size, rank)}; + std::shared_ptr sliced_invalid{m_invalid->SliceCol(world_size, rank)}; + + VerifyPredictionWithLesserFeatures(learner, predictor_name, rows, sliced_test, sliced_invalid); +} +} // anonymous namespace + +void TestPredictionWithLesserFeaturesColumnSplit(std::string predictor_name) { + size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4; + auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true); + auto learner = LearnerForTest(m_train, kIters); + auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false); + auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false); + + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, VerifyPredictionWithLesserFeaturesColumnSplit, + learner.get(), predictor_name, kRows, m_test, m_invalid); +} void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind, bst_cat_t split_cat, float left_weight, @@ -212,7 +252,7 @@ void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind, model->CommitModelGroup(std::move(trees), 0); } -void TestCategoricalPrediction(std::string name) { +void TestCategoricalPrediction(std::string name, bool is_column_split) { size_t constexpr kCols = 10; PredictionCacheEntry out_predictions; @@ -236,6 +276,9 @@ void TestCategoricalPrediction(std::string name) { std::vector types(10, FeatureType::kCategorical); m->Info().feature_types.HostVector() = types; + if (is_column_split) { + m = std::shared_ptr{m->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + } predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model); predictor->PredictBatch(m.get(), &out_predictions, model, 0); @@ -246,13 +289,21 @@ void TestCategoricalPrediction(std::string name) { row[split_ind] = split_cat + 1; m = GetDMatrixFromData(row, 1, kCols); + if (is_column_split) { + m = std::shared_ptr{m->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + } out_predictions.version = 0; predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model); predictor->PredictBatch(m.get(), &out_predictions, model, 0); ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score); } -void TestCategoricalPredictLeaf(StringView name) { +void TestCategoricalPredictionColumnSplit(std::string name) { + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, name, true); +} + +void TestCategoricalPredictLeaf(StringView name, bool is_column_split) { size_t constexpr kCols = 10; PredictionCacheEntry out_predictions; @@ -275,6 +326,9 @@ void TestCategoricalPredictLeaf(StringView name) { std::vector row(kCols); row[split_ind] = split_cat; auto m = GetDMatrixFromData(row, 1, kCols); + if (is_column_split) { + m = std::shared_ptr{m->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + } predictor->PredictLeaf(m.get(), &out_predictions.predictions, model); CHECK_EQ(out_predictions.predictions.Size(), 1); @@ -283,25 +337,25 @@ void TestCategoricalPredictLeaf(StringView name) { row[split_ind] = split_cat + 1; m = GetDMatrixFromData(row, 1, kCols); + if (is_column_split) { + m = std::shared_ptr{m->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + } out_predictions.version = 0; predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model); predictor->PredictLeaf(m.get(), &out_predictions.predictions, model); ASSERT_EQ(out_predictions.predictions.HostVector()[0], 1); } +void TestCategoricalPredictLeafColumnSplit(StringView name) { + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, name, true); +} void TestIterationRange(std::string name) { - size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3; + size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10; auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses); - std::unique_ptr learner{Learner::Create({dmat})}; - - learner->SetParams(Args{{"num_parallel_tree", std::to_string(kForest)}, - {"predictor", name}}); - - size_t kIters = 10; - for (size_t i = 0; i < kIters; ++i) { - learner->UpdateOneIter(i, dmat); - } + auto learner = LearnerForTest(dmat, kIters, kForest); + learner->SetParams(Args{{"predictor", name}}); bool bound = false; std::unique_ptr sliced {learner->Slice(0, 3, 1, &bound)}; @@ -363,15 +417,82 @@ void TestIterationRange(std::string name) { } } -void TestSparsePrediction(float sparsity, std::string predictor) { - size_t constexpr kRows = 512, kCols = 128; - auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true); - std::unique_ptr learner{Learner::Create({Xy})}; - learner->Configure(); - for (size_t i = 0; i < 4; ++i) { - learner->UpdateOneIter(i, Xy); +namespace { +void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *sliced, + std::vector const &expected_margin_ranged, + std::vector const &expected_margin_sliced, + std::vector const &expected_leaf_ranged, + std::vector const &expected_leaf_sliced) { + auto const world_size = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + std::shared_ptr Xy{dmat->SliceCol(world_size, rank)}; + + HostDeviceVector out_predt_sliced; + HostDeviceVector out_predt_ranged; + + // margin + { + sliced->Predict(Xy, true, &out_predt_sliced, 0, 0, false, false, false, false, false); + learner->Predict(Xy, true, &out_predt_ranged, 0, 3, false, false, false, false, false); + auto const &h_sliced = out_predt_sliced.HostVector(); + auto const &h_range = out_predt_ranged.HostVector(); + ASSERT_EQ(h_sliced.size(), expected_margin_sliced.size()); + ASSERT_EQ(h_sliced, expected_margin_sliced); + ASSERT_EQ(h_range.size(), expected_margin_ranged.size()); + ASSERT_EQ(h_range, expected_margin_ranged); } + // Leaf + { + sliced->Predict(Xy, false, &out_predt_sliced, 0, 0, false, true, false, false, false); + learner->Predict(Xy, false, &out_predt_ranged, 0, 3, false, true, false, false, false); + auto const &h_sliced = out_predt_sliced.HostVector(); + auto const &h_range = out_predt_ranged.HostVector(); + ASSERT_EQ(h_sliced.size(), expected_leaf_sliced.size()); + ASSERT_EQ(h_sliced, expected_leaf_sliced); + ASSERT_EQ(h_range.size(), expected_leaf_ranged.size()); + ASSERT_EQ(h_range, expected_leaf_ranged); + } +} +} // anonymous namespace + +void TestIterationRangeColumnSplit(std::string name) { + size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10; + auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses); + auto learner = LearnerForTest(dmat, kIters, kForest); + learner->SetParams(Args{{"predictor", name}}); + + bool bound = false; + std::unique_ptr sliced{learner->Slice(0, 3, 1, &bound)}; + ASSERT_FALSE(bound); + + // margin + HostDeviceVector margin_predt_sliced; + HostDeviceVector margin_predt_ranged; + sliced->Predict(dmat, true, &margin_predt_sliced, 0, 0, false, false, false, false, false); + learner->Predict(dmat, true, &margin_predt_ranged, 0, 3, false, false, false, false, false); + auto const &margin_sliced = margin_predt_sliced.HostVector(); + auto const &margin_ranged = margin_predt_ranged.HostVector(); + + // Leaf + HostDeviceVector leaf_predt_sliced; + HostDeviceVector leaf_predt_ranged; + sliced->Predict(dmat, false, &leaf_predt_sliced, 0, 0, false, true, false, false, false); + learner->Predict(dmat, false, &leaf_predt_ranged, 0, 3, false, true, false, false, false); + auto const &leaf_sliced = leaf_predt_sliced.HostVector(); + auto const &leaf_ranged = leaf_predt_ranged.HostVector(); + + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, VerifyIterationRangeColumnSplit, dmat.get(), + learner.get(), sliced.get(), margin_ranged, margin_sliced, + leaf_ranged, leaf_sliced); +} + +void TestSparsePrediction(float sparsity, std::string predictor) { + size_t constexpr kRows = 512, kCols = 128, kIters = 4; + auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true); + auto learner = LearnerForTest(Xy, kIters); + HostDeviceVector sparse_predt; Json model{Object{}}; @@ -419,6 +540,43 @@ void TestSparsePrediction(float sparsity, std::string predictor) { } } +namespace { +void VerifySparsePredictionColumnSplit(DMatrix *dmat, Learner *learner, + std::vector const &expected_predt) { + std::shared_ptr sliced{ + dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + HostDeviceVector sparse_predt; + learner->Predict(sliced, false, &sparse_predt, 0, 0); + + auto const &predt = sparse_predt.HostVector(); + ASSERT_EQ(predt.size(), expected_predt.size()); + for (size_t i = 0; i < predt.size(); ++i) { + ASSERT_FLOAT_EQ(predt[i], expected_predt[i]); + } +} +} // anonymous namespace + +void TestSparsePredictionColumnSplit(float sparsity, std::string predictor) { + size_t constexpr kRows = 512, kCols = 128, kIters = 4; + auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true); + auto learner = LearnerForTest(Xy, kIters); + + HostDeviceVector sparse_predt; + + Json model{Object{}}; + learner->SaveModel(&model); + + learner.reset(Learner::Create({Xy})); + learner->LoadModel(model); + + learner->SetParam("predictor", predictor); + learner->Predict(Xy, false, &sparse_predt, 0, 0); + + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, VerifySparsePredictionColumnSplit, Xy.get(), + learner.get(), sparse_predt.HostVector()); +} + void TestVectorLeafPrediction(Context const *ctx) { std::unique_ptr cpu_predictor = std::unique_ptr(Predictor::Create("cpu_predictor", ctx)); diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h index 302c6bfae..e02f5f578 100644 --- a/tests/cpp/predictor/test_predictor.h +++ b/tests/cpp/predictor/test_predictor.h @@ -86,14 +86,24 @@ void TestInplacePrediction(std::shared_ptr x, std::string predictor, bs void TestPredictionWithLesserFeatures(std::string preditor_name); -void TestCategoricalPrediction(std::string name); +void TestPredictionWithLesserFeaturesColumnSplit(std::string preditor_name); -void TestCategoricalPredictLeaf(StringView name); +void TestCategoricalPrediction(std::string name, bool is_column_split = false); + +void TestCategoricalPredictionColumnSplit(std::string name); + +void TestCategoricalPredictLeaf(StringView name, bool is_column_split = false); + +void TestCategoricalPredictLeafColumnSplit(StringView name); void TestIterationRange(std::string name); +void TestIterationRangeColumnSplit(std::string name); + void TestSparsePrediction(float sparsity, std::string predictor); +void TestSparsePredictionColumnSplit(float sparsity, std::string predictor); + void TestVectorLeafPrediction(Context const* ctx); } // namespace xgboost