Implement categorical prediction for CPU and GPU predict leaf. (#7001)

* Categorical prediction with CPU predictor and GPU predict leaf.

* Implement categorical prediction for CPU prediction.
* Implement categorical prediction for GPU predict leaf.
* Refactor the prediction functions to have a unified get next node function.

Co-authored-by: Shvets Kirill <kirill.shvets@intel.com>
This commit is contained in:
Jiaming Yuan
2021-06-11 10:11:45 +08:00
committed by GitHub
parent 72f9daf9b6
commit f79cc4a7a4
10 changed files with 340 additions and 200 deletions

View File

@@ -229,9 +229,17 @@ void TestUpdatePredictionCache(bool use_subsampling) {
}
}
TEST(CPUPredictor, CategoricalPrediction) {
TestCategoricalPrediction("cpu_predictor");
}
TEST(CPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(StringView{"cpu_predictor"});
}
TEST(CpuPredictor, UpdatePredictionCache) {
TestUpdatePredictionCache(false);
TestUpdatePredictionCache(true);
TestUpdatePredictionCache(false);
TestUpdatePredictionCache(true);
}
TEST(CpuPredictor, LesserFeatures) {

View File

@@ -228,6 +228,10 @@ TEST(GPUPredictor, CategoricalPrediction) {
TestCategoricalPrediction("gpu_predictor");
}
TEST(GPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(StringView{"gpu_predictor"});
}
TEST(GPUPredictor, PredictLeafBasic) {
size_t constexpr kRows = 5, kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix();

View File

@@ -180,6 +180,25 @@ void TestPredictionWithLesserFeatures(std::string predictor_name) {
#endif // defined(XGBOOST_USE_CUDA)
}
void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
bst_cat_t split_cat, float left_weight,
float right_weight) {
PredictionCacheEntry out_predictions;
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
auto& p_tree = trees.front();
std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(split_cat));
LBitField32 cats_bits(split_cats);
cats_bits.Set(split_cat);
p_tree->ExpandCategorical(0, split_ind, split_cats, true, 1.5f,
left_weight, right_weight,
3.0f, 2.2f, 7.0f, 9.0f);
model->CommitModel(std::move(trees), 0);
}
void TestCategoricalPrediction(std::string name) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
@@ -189,25 +208,13 @@ void TestCategoricalPrediction(std::string name) {
param.num_output_group = 1;
param.base_score = 0.5;
gbm::GBTreeModel model(&param);
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
auto& p_tree = trees.front();
uint32_t split_ind = 3;
bst_cat_t split_cat = 4;
float left_weight = 1.3f;
float right_weight = 1.7f;
std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(split_cat));
LBitField32 cats_bits(split_cats);
cats_bits.Set(split_cat);
p_tree->ExpandCategorical(0, split_ind, split_cats, true, 1.5f,
left_weight, right_weight,
3.0f, 2.2f, 7.0f, 9.0f);
model.CommitModel(std::move(trees), 0);
gbm::GBTreeModel model(&param);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
GenericParameter runtime;
runtime.gpu_id = 0;
@@ -232,4 +239,43 @@ void TestCategoricalPrediction(std::string name) {
ASSERT_EQ(out_predictions.predictions.HostVector()[0],
left_weight + param.base_score);
}
void TestCategoricalPredictLeaf(StringView name) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
LearnerModelParam param;
param.num_feature = kCols;
param.num_output_group = 1;
param.base_score = 0.5;
uint32_t split_ind = 3;
bst_cat_t split_cat = 4;
float left_weight = 1.3f;
float right_weight = 1.7f;
gbm::GBTreeModel model(&param);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
GenericParameter runtime;
runtime.gpu_id = 0;
std::unique_ptr<Predictor> predictor{
Predictor::Create(name.c_str(), &runtime)};
std::vector<float> row(kCols);
row[split_ind] = split_cat;
auto m = GetDMatrixFromData(row, 1, kCols);
predictor->PredictLeaf(m.get(), &out_predictions.predictions, model);
CHECK_EQ(out_predictions.predictions.Size(), 1);
// go to left if it doesn't match the category, otherwise right.
ASSERT_EQ(out_predictions.predictions.HostVector()[0], 2);
row[split_ind] = split_cat + 1;
m = GetDMatrixFromData(row, 1, kCols);
out_predictions.version = 0;
predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
predictor->PredictLeaf(m.get(), &out_predictions.predictions, model);
ASSERT_EQ(out_predictions.predictions.HostVector()[0], 1);
}
} // namespace xgboost

View File

@@ -66,6 +66,8 @@ void TestInplacePrediction(dmlc::any x, std::string predictor,
void TestPredictionWithLesserFeatures(std::string preditor_name);
void TestCategoricalPrediction(std::string name);
void TestCategoricalPredictLeaf(StringView name);
} // namespace xgboost
#endif // XGBOOST_TEST_PREDICTOR_H_