Calculate base_score based on input labels for mae. (#8107)

Fit an intercept as base score for abs loss.
This commit is contained in:
Jiaming Yuan
2022-09-20 20:53:54 +08:00
committed by GitHub
parent 4f42aa5f12
commit fffb1fca52
42 changed files with 999 additions and 343 deletions

View File

@@ -21,14 +21,11 @@ TEST(CpuPredictor, Basic) {
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
LearnerModelParam param;
param.num_feature = kCols;
param.base_score = 0.0;
param.num_output_group = 1;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -104,14 +101,11 @@ TEST(CpuPredictor, ExternalMemory) {
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
LearnerModelParam param;
param.base_score = 0;
param.num_feature = dmat->Info().num_col_;
param.num_output_group = 1;
LearnerModelParam mparam{MakeMP(dmat->Info().num_col_, .0, 1)};
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
@@ -201,16 +195,11 @@ TEST(CpuPredictor, InplacePredict) {
void TestUpdatePredictionCache(bool use_subsampling) {
size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
LearnerModelParam mparam;
mparam.num_feature = kCols;
mparam.num_output_group = kClasses;
mparam.base_score = 0;
GenericParameter gparam;
gparam.Init(Args{});
LearnerModelParam mparam{MakeMP(kCols, .0, kClasses)};
Context ctx;
std::unique_ptr<gbm::GBTree> gbm;
gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &gparam, &mparam)));
gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &ctx, &mparam)));
std::map<std::string, std::string> cfg;
cfg["tree_method"] = "hist";
cfg["predictor"] = "cpu_predictor";

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2020 XGBoost contributors
* Copyright 2017-2022 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/c_api.h>
@@ -34,14 +34,10 @@ TEST(GPUPredictor, Basic) {
int n_row = i, n_col = i;
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
LearnerModelParam param;
param.num_feature = n_col;
param.num_output_group = 1;
param.base_score = 0.5;
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry gpu_out_predictions;
@@ -93,15 +89,12 @@ TEST(GPUPredictor, ExternalMemoryTest) {
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
gpu_predictor->Configure({});
LearnerModelParam param;
param.num_feature = 5;
const int n_classes = 3;
param.num_output_group = n_classes;
param.base_score = 0.5;
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&param, &ctx, n_classes);
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
std::vector<std::unique_ptr<DMatrix>> dmats;
dmats.push_back(CreateSparsePageDMatrix(400));
@@ -171,15 +164,10 @@ TEST(GpuPredictor, LesserFeatures) {
TEST(GPUPredictor, ShapStump) {
cudaSetDevice(0);
LearnerModelParam param;
param.num_feature = 1;
param.num_output_group = 1;
param.base_score = 0.5;
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model(&param, &ctx);
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
@@ -193,24 +181,20 @@ TEST(GPUPredictor, ShapStump) {
auto dmat = RandomDataGenerator(3, 1, 0).GenerateDMatrix();
gpu_predictor->PredictContribution(dmat.get(), &predictions, model);
auto& phis = predictions.HostVector();
auto base_score = mparam.BaseScore(Context::kCpuId)(0);
EXPECT_EQ(phis[0], 0.0);
EXPECT_EQ(phis[1], param.base_score);
EXPECT_EQ(phis[1], base_score);
EXPECT_EQ(phis[2], 0.0);
EXPECT_EQ(phis[3], param.base_score);
EXPECT_EQ(phis[3], base_score);
EXPECT_EQ(phis[4], 0.0);
EXPECT_EQ(phis[5], param.base_score);
EXPECT_EQ(phis[5], base_score);
}
TEST(GPUPredictor, Shap) {
LearnerModelParam param;
param.num_feature = 1;
param.num_output_group = 1;
param.base_score = 0.5;
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model(&param, &ctx);
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
@@ -258,14 +242,9 @@ TEST(GPUPredictor, PredictLeafBasic) {
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
gpu_predictor->Configure({});
LearnerModelParam param;
param.num_feature = kCols;
param.base_score = 0.0;
param.num_output_group = 1;
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&param, &ctx);
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
Context ctx;
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
HostDeviceVector<float> leaf_out_predictions;
gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);

View File

@@ -210,11 +210,7 @@ void TestCategoricalPrediction(std::string name) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
LearnerModelParam param;
param.num_feature = kCols;
param.num_output_group = 1;
param.base_score = 0.5;
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
uint32_t split_ind = 3;
bst_cat_t split_cat = 4;
float left_weight = 1.3f;
@@ -222,7 +218,7 @@ void TestCategoricalPrediction(std::string name) {
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model(&param, &ctx);
gbm::GBTreeModel model(&mparam, &ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
@@ -237,27 +233,24 @@ void TestCategoricalPrediction(std::string name) {
predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
predictor->PredictBatch(m.get(), &out_predictions, model, 0);
auto score = mparam.BaseScore(Context::kCpuId)(0);
ASSERT_EQ(out_predictions.predictions.Size(), 1ul);
ASSERT_EQ(out_predictions.predictions.HostVector()[0],
right_weight + param.base_score); // go to right for matching cat
right_weight + score); // go to right for matching cat
row[split_ind] = split_cat + 1;
m = GetDMatrixFromData(row, 1, kCols);
out_predictions.version = 0;
predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
predictor->PredictBatch(m.get(), &out_predictions, model, 0);
ASSERT_EQ(out_predictions.predictions.HostVector()[0],
left_weight + param.base_score);
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
}
void TestCategoricalPredictLeaf(StringView name) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
LearnerModelParam param;
param.num_feature = kCols;
param.num_output_group = 1;
param.base_score = 0.5;
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
uint32_t split_ind = 3;
bst_cat_t split_cat = 4;
@@ -267,7 +260,7 @@ void TestCategoricalPredictLeaf(StringView name) {
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model(&param, &ctx);
gbm::GBTreeModel model(&mparam, &ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
ctx.gpu_id = 0;

View File

@@ -12,11 +12,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
std::shared_ptr<DMatrix> p_hist) {
constexpr size_t kClasses { 3 };
LearnerModelParam param;
param.num_feature = cols;
param.num_output_group = kClasses;
param.base_score = 0.5;
LearnerModelParam mparam{MakeMP(cols, .5, kClasses)};
auto lparam = CreateEmptyGenericParam(0);
std::unique_ptr<Predictor> predictor =
@@ -25,7 +21,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
GenericParameter ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&param, &ctx, kClasses);
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, kClasses);
{
auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();