Calculate base_score based on input labels for mae. (#8107)
Fit an intercept as base score for abs loss.
This commit is contained in:
@@ -29,5 +29,15 @@ TEST(Numeric, PartialSum) {
|
||||
ASSERT_EQ(sol, result);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Numeric, Reduce) {
|
||||
Context ctx;
|
||||
ASSERT_TRUE(ctx.IsCPU());
|
||||
HostDeviceVector<float> values(20);
|
||||
auto& h_values = values.HostVector();
|
||||
std::iota(h_values.begin(), h_values.end(), 0.0f);
|
||||
auto sum = Reduce(&ctx, values);
|
||||
ASSERT_EQ(sum, (values.Size() - 1) * values.Size() / 2);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -54,5 +54,20 @@ TEST(Stats, WeightedQuantile) {
|
||||
q = WeightedQuantile(1.0, beg, end, w);
|
||||
ASSERT_EQ(q, 5);
|
||||
}
|
||||
|
||||
TEST(Stats, Median) {
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
|
||||
Context ctx;
|
||||
HostDeviceVector<float> weights;
|
||||
auto m = Median(&ctx, values, weights);
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
m = Median(&ctx, values, weights);
|
||||
ASSERT_EQ(m, .5f);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -19,15 +19,11 @@ namespace gbm {
|
||||
TEST(GBLinear, JsonIO) {
|
||||
size_t constexpr kRows = 16, kCols = 16;
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = kCols;
|
||||
param.num_output_group = 1;
|
||||
Context ctx;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
GenericParameter gparam;
|
||||
gparam.Init(Args{});
|
||||
|
||||
std::unique_ptr<GradientBooster> gbm {
|
||||
CreateTrainedGBM("gblinear", Args{}, kRows, kCols, ¶m, &gparam) };
|
||||
std::unique_ptr<GradientBooster> gbm{
|
||||
CreateTrainedGBM("gblinear", Args{}, kRows, kCols, &mparam, &ctx)};
|
||||
Json model { Object() };
|
||||
gbm->SaveModel(&model);
|
||||
ASSERT_TRUE(IsA<Object>(model));
|
||||
|
||||
@@ -18,15 +18,11 @@ namespace xgboost {
|
||||
TEST(GBTree, SelectTreeMethod) {
|
||||
size_t constexpr kCols = 10;
|
||||
|
||||
GenericParameter generic_param;
|
||||
generic_param.UpdateAllowUnknown(Args{});
|
||||
LearnerModelParam mparam;
|
||||
mparam.base_score = 0.5;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
Context ctx;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
std::unique_ptr<GradientBooster> p_gbm {
|
||||
GradientBooster::Create("gbtree", &generic_param, &mparam)};
|
||||
GradientBooster::Create("gbtree", &ctx, &mparam)};
|
||||
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
|
||||
|
||||
// Test if `tree_method` can be set
|
||||
@@ -45,7 +41,7 @@ TEST(GBTree, SelectTreeMethod) {
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||
|
||||
#ifdef XGBOOST_USE_CUDA
|
||||
generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
gbtree.Configure({{"tree_method", "gpu_hist"}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||
gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
|
||||
@@ -55,15 +51,11 @@ TEST(GBTree, SelectTreeMethod) {
|
||||
|
||||
TEST(GBTree, PredictionCache) {
|
||||
size_t constexpr kRows = 100, kCols = 10;
|
||||
GenericParameter generic_param;
|
||||
generic_param.UpdateAllowUnknown(Args{});
|
||||
LearnerModelParam mparam;
|
||||
mparam.base_score = 0.5;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
Context ctx;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
std::unique_ptr<GradientBooster> p_gbm {
|
||||
GradientBooster::Create("gbtree", &generic_param, &mparam)};
|
||||
GradientBooster::Create("gbtree", &ctx, &mparam)};
|
||||
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
|
||||
|
||||
gbtree.Configure({{"tree_method", "hist"}});
|
||||
@@ -176,16 +168,11 @@ TEST(GBTree, ChoosePredictor) {
|
||||
TEST(GBTree, JsonIO) {
|
||||
size_t constexpr kRows = 16, kCols = 16;
|
||||
|
||||
LearnerModelParam mparam;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
mparam.base_score = 0.5;
|
||||
|
||||
GenericParameter gparam;
|
||||
gparam.Init(Args{});
|
||||
Context ctx;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
std::unique_ptr<GradientBooster> gbm {
|
||||
CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &gparam) };
|
||||
CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &ctx) };
|
||||
|
||||
Json model {Object()};
|
||||
model["model"] = Object();
|
||||
@@ -215,16 +202,11 @@ TEST(GBTree, JsonIO) {
|
||||
TEST(Dart, JsonIO) {
|
||||
size_t constexpr kRows = 16, kCols = 16;
|
||||
|
||||
LearnerModelParam mparam;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.base_score = 0.5;
|
||||
mparam.num_output_group = 1;
|
||||
Context ctx;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
GenericParameter gparam;
|
||||
gparam.Init(Args{});
|
||||
|
||||
std::unique_ptr<GradientBooster> gbm {
|
||||
CreateTrainedGBM("dart", Args{}, kRows, kCols, &mparam, &gparam) };
|
||||
std::unique_ptr<GradientBooster> gbm{
|
||||
CreateTrainedGBM("dart", Args{}, kRows, kCols, &mparam, &ctx)};
|
||||
|
||||
Json model {Object()};
|
||||
model["model"] = Object();
|
||||
|
||||
@@ -451,5 +451,16 @@ class RMMAllocator;
|
||||
using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
|
||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
|
||||
|
||||
/*
|
||||
* \brief Make learner model param
|
||||
*/
|
||||
inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
|
||||
int32_t device = Context::kCpuId) {
|
||||
size_t shape[1]{1};
|
||||
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
|
||||
n_groups);
|
||||
return mparam;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
#endif
|
||||
|
||||
@@ -18,10 +18,7 @@ TEST(Linear, Shotgun) {
|
||||
auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
LearnerModelParam mparam;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
mparam.base_score = 0.5;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
{
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
@@ -54,10 +51,7 @@ TEST(Linear, coordinate) {
|
||||
auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
LearnerModelParam mparam;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
mparam.base_score = 0.5;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("coord_descent", &lparam));
|
||||
|
||||
@@ -13,15 +13,11 @@ TEST(Linear, GPUCoordinate) {
|
||||
size_t constexpr kCols = 10;
|
||||
|
||||
auto mat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
LearnerModelParam mparam;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
mparam.base_score = 0.5;
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
|
||||
xgboost::LinearUpdater::Create("gpu_coord_descent", &ctx));
|
||||
updater->Configure({{"eta", "1."}});
|
||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||
mat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||
@@ -36,4 +32,4 @@ TEST(Linear, GPUCoordinate) {
|
||||
TEST(GPUCoordinate, JsonIO) {
|
||||
TestUpdaterJsonIO("gpu_coord_descent");
|
||||
}
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -21,14 +21,11 @@ TEST(CpuPredictor, Basic) {
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = kCols;
|
||||
param.base_score = 0.0;
|
||||
param.num_output_group = 1;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, &ctx);
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
@@ -104,14 +101,11 @@ TEST(CpuPredictor, ExternalMemory) {
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
|
||||
|
||||
LearnerModelParam param;
|
||||
param.base_score = 0;
|
||||
param.num_feature = dmat->Info().num_col_;
|
||||
param.num_output_group = 1;
|
||||
LearnerModelParam mparam{MakeMP(dmat->Info().num_col_, .0, 1)};
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, &ctx);
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
@@ -201,16 +195,11 @@ TEST(CpuPredictor, InplacePredict) {
|
||||
|
||||
void TestUpdatePredictionCache(bool use_subsampling) {
|
||||
size_t constexpr kRows = 64, kCols = 16, kClasses = 4;
|
||||
LearnerModelParam mparam;
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = kClasses;
|
||||
mparam.base_score = 0;
|
||||
|
||||
GenericParameter gparam;
|
||||
gparam.Init(Args{});
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, kClasses)};
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<gbm::GBTree> gbm;
|
||||
gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &gparam, &mparam)));
|
||||
gbm.reset(static_cast<gbm::GBTree*>(GradientBooster::Create("gbtree", &ctx, &mparam)));
|
||||
std::map<std::string, std::string> cfg;
|
||||
cfg["tree_method"] = "hist";
|
||||
cfg["predictor"] = "cpu_predictor";
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2020 XGBoost contributors
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/c_api.h>
|
||||
@@ -34,14 +34,10 @@ TEST(GPUPredictor, Basic) {
|
||||
int n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = n_col;
|
||||
param.num_output_group = 1;
|
||||
param.base_score = 0.5;
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, &ctx);
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry gpu_out_predictions;
|
||||
@@ -93,15 +89,12 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
|
||||
gpu_predictor->Configure({});
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = 5;
|
||||
const int n_classes = 3;
|
||||
param.num_output_group = n_classes;
|
||||
param.base_score = 0.5;
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, &ctx, n_classes);
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
|
||||
dmats.push_back(CreateSparsePageDMatrix(400));
|
||||
@@ -171,15 +164,10 @@ TEST(GpuPredictor, LesserFeatures) {
|
||||
TEST(GPUPredictor, ShapStump) {
|
||||
cudaSetDevice(0);
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = 1;
|
||||
param.num_output_group = 1;
|
||||
param.base_score = 0.5;
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
|
||||
gbm::GBTreeModel model(¶m, &ctx);
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
@@ -193,24 +181,20 @@ TEST(GPUPredictor, ShapStump) {
|
||||
auto dmat = RandomDataGenerator(3, 1, 0).GenerateDMatrix();
|
||||
gpu_predictor->PredictContribution(dmat.get(), &predictions, model);
|
||||
auto& phis = predictions.HostVector();
|
||||
auto base_score = mparam.BaseScore(Context::kCpuId)(0);
|
||||
EXPECT_EQ(phis[0], 0.0);
|
||||
EXPECT_EQ(phis[1], param.base_score);
|
||||
EXPECT_EQ(phis[1], base_score);
|
||||
EXPECT_EQ(phis[2], 0.0);
|
||||
EXPECT_EQ(phis[3], param.base_score);
|
||||
EXPECT_EQ(phis[3], base_score);
|
||||
EXPECT_EQ(phis[4], 0.0);
|
||||
EXPECT_EQ(phis[5], param.base_score);
|
||||
EXPECT_EQ(phis[5], base_score);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, Shap) {
|
||||
LearnerModelParam param;
|
||||
param.num_feature = 1;
|
||||
param.num_output_group = 1;
|
||||
param.base_score = 0.5;
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
|
||||
gbm::GBTreeModel model(¶m, &ctx);
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
@@ -258,14 +242,9 @@ TEST(GPUPredictor, PredictLeafBasic) {
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
|
||||
gpu_predictor->Configure({});
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = kCols;
|
||||
param.base_score = 0.0;
|
||||
param.num_output_group = 1;
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, &ctx);
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
|
||||
Context ctx;
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
HostDeviceVector<float> leaf_out_predictions;
|
||||
gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
|
||||
|
||||
@@ -210,11 +210,7 @@ void TestCategoricalPrediction(std::string name) {
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = kCols;
|
||||
param.num_output_group = 1;
|
||||
param.base_score = 0.5;
|
||||
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
uint32_t split_ind = 3;
|
||||
bst_cat_t split_cat = 4;
|
||||
float left_weight = 1.3f;
|
||||
@@ -222,7 +218,7 @@ void TestCategoricalPrediction(std::string name) {
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model(¶m, &ctx);
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
@@ -237,27 +233,24 @@ void TestCategoricalPrediction(std::string name) {
|
||||
|
||||
predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
|
||||
predictor->PredictBatch(m.get(), &out_predictions, model, 0);
|
||||
auto score = mparam.BaseScore(Context::kCpuId)(0);
|
||||
ASSERT_EQ(out_predictions.predictions.Size(), 1ul);
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0],
|
||||
right_weight + param.base_score); // go to right for matching cat
|
||||
right_weight + score); // go to right for matching cat
|
||||
|
||||
row[split_ind] = split_cat + 1;
|
||||
m = GetDMatrixFromData(row, 1, kCols);
|
||||
out_predictions.version = 0;
|
||||
predictor->InitOutPredictions(m->Info(), &out_predictions.predictions, model);
|
||||
predictor->PredictBatch(m.get(), &out_predictions, model, 0);
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0],
|
||||
left_weight + param.base_score);
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictLeaf(StringView name) {
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = kCols;
|
||||
param.num_output_group = 1;
|
||||
param.base_score = 0.5;
|
||||
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
|
||||
|
||||
uint32_t split_ind = 3;
|
||||
bst_cat_t split_cat = 4;
|
||||
@@ -267,7 +260,7 @@ void TestCategoricalPredictLeaf(StringView name) {
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
|
||||
gbm::GBTreeModel model(¶m, &ctx);
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
ctx.gpu_id = 0;
|
||||
|
||||
@@ -12,11 +12,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
constexpr size_t kClasses { 3 };
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = cols;
|
||||
param.num_output_group = kClasses;
|
||||
param.base_score = 0.5;
|
||||
|
||||
LearnerModelParam mparam{MakeMP(cols, .5, kClasses)};
|
||||
auto lparam = CreateEmptyGenericParam(0);
|
||||
|
||||
std::unique_ptr<Predictor> predictor =
|
||||
@@ -25,7 +21,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
|
||||
GenericParameter ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, &ctx, kClasses);
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, kClasses);
|
||||
|
||||
{
|
||||
auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();
|
||||
|
||||
@@ -3,8 +3,10 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/objective.h> // ObjFunction
|
||||
#include <xgboost/version_config.h>
|
||||
|
||||
#include <string> // std::stof, std::string
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
@@ -206,8 +208,7 @@ TEST(Learner, MultiThreadedPredict) {
|
||||
p_dmat->Info().labels.Reshape(kRows);
|
||||
CHECK_NE(p_dmat->Info().num_col_, 0);
|
||||
|
||||
std::shared_ptr<DMatrix> p_data{
|
||||
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
|
||||
std::shared_ptr<DMatrix> p_data{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
|
||||
CHECK_NE(p_data->Info().num_col_, 0);
|
||||
|
||||
std::shared_ptr<Learner> learner{Learner::Create({p_dmat})};
|
||||
@@ -448,4 +449,77 @@ TEST(Learner, MultiTarget) {
|
||||
EXPECT_THROW({ learner->Configure(); }, dmlc::Error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the model initialization sequence is correctly performed.
|
||||
*/
|
||||
TEST(Learner, InitEstimation) {
|
||||
size_t constexpr kCols = 10;
|
||||
auto Xy = RandomDataGenerator{10, kCols, 0}.GenerateDMatrix(true);
|
||||
|
||||
{
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("objective", "reg:absoluteerror");
|
||||
learner->Configure();
|
||||
HostDeviceVector<float> predt;
|
||||
learner->Predict(Xy, false, &predt, 0, 0);
|
||||
|
||||
auto h_predt = predt.ConstHostSpan();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_EQ(v, ObjFunction::DefaultBaseScore());
|
||||
}
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score =
|
||||
std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
|
||||
// No base score is estimated yet.
|
||||
ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("objective", "reg:absoluteerror");
|
||||
learner->UpdateOneIter(0, Xy);
|
||||
|
||||
HostDeviceVector<float> predt;
|
||||
learner->Predict(Xy, false, &predt, 0, 0);
|
||||
auto h_predt = predt.ConstHostSpan();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_NE(v, ObjFunction::DefaultBaseScore());
|
||||
}
|
||||
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score =
|
||||
std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
|
||||
ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
|
||||
|
||||
ASSERT_THROW(
|
||||
{
|
||||
learner->SetParam("base_score_estimated", "1");
|
||||
learner->Configure();
|
||||
},
|
||||
dmlc::Error);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("objective", "reg:absoluteerror");
|
||||
learner->SetParam("base_score", "1.3");
|
||||
learner->Configure();
|
||||
HostDeviceVector<float> predt;
|
||||
learner->Predict(Xy, false, &predt, 0, 0);
|
||||
auto h_predt = predt.ConstHostSpan();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_FLOAT_EQ(v, 1.3);
|
||||
}
|
||||
learner->UpdateOneIter(0, Xy);
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score =
|
||||
std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
|
||||
// no change
|
||||
ASSERT_FLOAT_EQ(base_score, 1.3);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -418,6 +418,45 @@ TEST_F(SerializationTest, GPUCoordDescent) {
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
class L1SerializationTest : public SerializationTest {};
|
||||
|
||||
TEST_F(L1SerializationTest, Exact) {
|
||||
TestLearnerSerialization({{"booster", "gbtree"},
|
||||
{"objective", "reg:absoluteerror"},
|
||||
{"seed", "0"},
|
||||
{"max_depth", "2"},
|
||||
{"tree_method", "exact"}},
|
||||
fmap_, p_dmat_);
|
||||
}
|
||||
|
||||
TEST_F(L1SerializationTest, Approx) {
|
||||
TestLearnerSerialization({{"booster", "gbtree"},
|
||||
{"objective", "reg:absoluteerror"},
|
||||
{"seed", "0"},
|
||||
{"max_depth", "2"},
|
||||
{"tree_method", "approx"}},
|
||||
fmap_, p_dmat_);
|
||||
}
|
||||
|
||||
TEST_F(L1SerializationTest, Hist) {
|
||||
TestLearnerSerialization({{"booster", "gbtree"},
|
||||
{"objective", "reg:absoluteerror"},
|
||||
{"seed", "0"},
|
||||
{"max_depth", "2"},
|
||||
{"tree_method", "hist"}},
|
||||
fmap_, p_dmat_);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST_F(L1SerializationTest, GpuHist) {
|
||||
TestLearnerSerialization({{"booster", "gbtree"},
|
||||
{"objective", "reg:absoluteerror"},
|
||||
{"seed", "0"},
|
||||
{"max_depth", "2"},
|
||||
{"tree_method", "gpu_hist"}},
|
||||
fmap_, p_dmat_);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
class LogitSerializationTest : public SerializationTest {
|
||||
protected:
|
||||
|
||||
@@ -208,3 +208,8 @@ class TestGPUUpdaters:
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), 10)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.parametrize("weighted", [True, False])
|
||||
def test_adaptive(self, weighted) -> None:
|
||||
self.cputest.run_adaptive("gpu_hist", weighted)
|
||||
|
||||
@@ -102,34 +102,38 @@ def run_scikit_model_check(name, path):
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_model_compatibility():
|
||||
'''Test model compatibility, can only be run on CI as others don't
|
||||
"""Test model compatibility, can only be run on CI as others don't
|
||||
have the credentials.
|
||||
|
||||
'''
|
||||
"""
|
||||
path = os.path.dirname(os.path.abspath(__file__))
|
||||
path = os.path.join(path, 'models')
|
||||
path = os.path.join(path, "models")
|
||||
|
||||
zip_path, _ = urllib.request.urlretrieve('https://xgboost-ci-jenkins-artifacts.s3-us-west-2' +
|
||||
'.amazonaws.com/xgboost_model_compatibility_test.zip')
|
||||
with zipfile.ZipFile(zip_path, 'r') as z:
|
||||
z.extractall(path)
|
||||
if not os.path.exists(path):
|
||||
zip_path, _ = urllib.request.urlretrieve(
|
||||
"https://xgboost-ci-jenkins-artifacts.s3-us-west-2"
|
||||
+ ".amazonaws.com/xgboost_model_compatibility_test.zip"
|
||||
)
|
||||
with zipfile.ZipFile(zip_path, "r") as z:
|
||||
z.extractall(path)
|
||||
|
||||
models = [
|
||||
os.path.join(root, f) for root, subdir, files in os.walk(path)
|
||||
os.path.join(root, f)
|
||||
for root, subdir, files in os.walk(path)
|
||||
for f in files
|
||||
if f != 'version'
|
||||
if f != "version"
|
||||
]
|
||||
assert models
|
||||
|
||||
for path in models:
|
||||
name = os.path.basename(path)
|
||||
if name.startswith('xgboost-'):
|
||||
if name.startswith("xgboost-"):
|
||||
booster = xgboost.Booster(model_file=path)
|
||||
run_booster_check(booster, name)
|
||||
# Do full serialization.
|
||||
booster = copy.copy(booster)
|
||||
run_booster_check(booster, name)
|
||||
elif name.startswith('xgboost_scikit'):
|
||||
elif name.startswith("xgboost_scikit"):
|
||||
run_scikit_model_check(name, path)
|
||||
else:
|
||||
assert False
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from random import choice
|
||||
import json
|
||||
from string import ascii_lowercase
|
||||
from typing import Dict, Any
|
||||
import testing as tm
|
||||
@@ -397,3 +397,72 @@ class TestTreeMethod:
|
||||
def test_categorical_missing(self, rows, cols, cats):
|
||||
self.run_categorical_missing(rows, cols, cats, "approx")
|
||||
self.run_categorical_missing(rows, cols, cats, "hist")
|
||||
|
||||
def run_adaptive(self, tree_method, weighted) -> None:
|
||||
rng = np.random.RandomState(1994)
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.utils import stats
|
||||
|
||||
n_samples = 256
|
||||
X, y = make_regression(n_samples, 16, random_state=rng)
|
||||
if weighted:
|
||||
w = rng.normal(size=n_samples)
|
||||
w -= w.min()
|
||||
Xy = xgb.DMatrix(X, y, weight=w)
|
||||
base_score = stats._weighted_percentile(y, w, percentile=50)
|
||||
else:
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
base_score = np.median(y)
|
||||
|
||||
booster_0 = xgb.train(
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"base_score": base_score,
|
||||
"objective": "reg:absoluteerror",
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
booster_1 = xgb.train(
|
||||
{"tree_method": tree_method, "objective": "reg:absoluteerror"},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
config_0 = json.loads(booster_0.save_config())
|
||||
config_1 = json.loads(booster_1.save_config())
|
||||
|
||||
def get_score(config: Dict) -> float:
|
||||
return float(config["learner"]["learner_model_param"]["base_score"])
|
||||
|
||||
assert get_score(config_0) == get_score(config_1)
|
||||
|
||||
raw_booster = booster_1.save_raw(raw_format="deprecated")
|
||||
booster_2 = xgb.Booster(model_file=raw_booster)
|
||||
config_2 = json.loads(booster_2.save_config())
|
||||
assert get_score(config_1) == get_score(config_2)
|
||||
|
||||
raw_booster = booster_1.save_raw(raw_format="ubj")
|
||||
booster_2 = xgb.Booster(model_file=raw_booster)
|
||||
config_2 = json.loads(booster_2.save_config())
|
||||
assert get_score(config_1) == get_score(config_2)
|
||||
|
||||
booster_0 = xgb.train(
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"base_score": base_score + 1.0,
|
||||
"objective": "reg:absoluteerror",
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
config_0 = json.loads(booster_0.save_config())
|
||||
np.testing.assert_allclose(get_score(config_0), get_score(config_1) + 1)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.parametrize(
|
||||
"tree_method,weighted", [
|
||||
("approx", False), ("hist", False), ("approx", True), ("hist", True)
|
||||
]
|
||||
)
|
||||
def test_adaptive(self, tree_method, weighted) -> None:
|
||||
self.run_adaptive(tree_method, weighted)
|
||||
|
||||
@@ -1537,13 +1537,56 @@ class TestWithDask:
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.gtest
|
||||
def test_quantile_same_on_all_workers(self) -> None:
|
||||
self.run_quantile('SameOnAllWorkers')
|
||||
self.run_quantile("SameOnAllWorkers")
|
||||
|
||||
def test_adaptive(self) -> None:
|
||||
def get_score(config: Dict) -> float:
|
||||
return float(config["learner"]["learner_model_param"]["base_score"])
|
||||
|
||||
def local_test(rabit_args: List[bytes], worker_id: int) -> bool:
|
||||
with xgb.dask.RabitContext(rabit_args):
|
||||
if worker_id == 0:
|
||||
y = np.array([0.0, 0.0, 0.0])
|
||||
x = np.array([[0.0]] * 3)
|
||||
else:
|
||||
y = np.array([1000.0])
|
||||
x = np.array(
|
||||
[
|
||||
[0.0],
|
||||
]
|
||||
)
|
||||
|
||||
Xy = xgb.DMatrix(x, y)
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "objective": "reg:absoluteerror"},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
config = json.loads(booster.save_config())
|
||||
base_score = get_score(config)
|
||||
assert base_score == 250.0
|
||||
return True
|
||||
|
||||
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
|
||||
with Client(cluster) as client:
|
||||
workers = _get_client_workers(client)
|
||||
rabit_args = client.sync(
|
||||
xgb.dask._get_rabit_args, len(workers), None, client
|
||||
)
|
||||
futures = []
|
||||
for i, _ in enumerate(workers):
|
||||
f = client.submit(local_test, rabit_args, i)
|
||||
futures.append(f)
|
||||
|
||||
results = client.gather(futures)
|
||||
assert all(results)
|
||||
|
||||
def test_n_workers(self) -> None:
|
||||
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
|
||||
with Client(cluster) as client:
|
||||
workers = _get_client_workers(client)
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
dX = client.submit(da.from_array, X, workers=[workers[0]]).result()
|
||||
dy = client.submit(da.from_array, y, workers=[workers[0]]).result()
|
||||
|
||||
Reference in New Issue
Block a user