De-duplicate GPU parameters. (#4454)

* Only define `gpu_id` and `n_gpus` in `LearnerTrainParam`
* Pass LearnerTrainParam through XGBoost vid factory method.
* Disable all GPU usage when GPU related parameters are not specified (fixes XGBoost choosing GPU over aggressively).
* Test learner train param io.
* Fix gpu pickling.
This commit is contained in:
Jiaming Yuan
2019-05-29 11:55:57 +08:00
committed by GitHub
parent a3fedbeaa8
commit c589eff941
69 changed files with 927 additions and 562 deletions

View File

@@ -1,21 +1,14 @@
/*!
* Copyright 2018 XGBoost contributors
* Copyright 2018-2019 XGBoost contributors
*/
#include <xgboost/metric.h>
#include <map>
#include "../helpers.h"
using Arg = std::pair<std::string, std::string>;
#if defined(__CUDACC__)
#define N_GPU() Arg{"n_gpus", "1"}
#else
#define N_GPU() Arg{"n_gpus", "0"}
#endif
TEST(Metric, DeclareUnifiedTest(RMSE)) {
xgboost::Metric * metric = xgboost::Metric::Create("rmse");
metric->Configure({N_GPU()});
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -27,8 +20,9 @@ TEST(Metric, DeclareUnifiedTest(RMSE)) {
}
TEST(Metric, DeclareUnifiedTest(MAE)) {
xgboost::Metric * metric = xgboost::Metric::Create("mae");
metric->Configure({N_GPU()});
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
xgboost::Metric * metric = xgboost::Metric::Create("mae", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mae");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -39,8 +33,9 @@ TEST(Metric, DeclareUnifiedTest(MAE)) {
}
TEST(Metric, DeclareUnifiedTest(LogLoss)) {
xgboost::Metric * metric = xgboost::Metric::Create("logloss");
metric->Configure({N_GPU()});
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
xgboost::Metric * metric = xgboost::Metric::Create("logloss", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "logloss");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -51,8 +46,9 @@ TEST(Metric, DeclareUnifiedTest(LogLoss)) {
}
TEST(Metric, DeclareUnifiedTest(Error)) {
xgboost::Metric * metric = xgboost::Metric::Create("error");
metric->Configure({N_GPU()});
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
xgboost::Metric * metric = xgboost::Metric::Create("error", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "error");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -60,17 +56,17 @@ TEST(Metric, DeclareUnifiedTest(Error)) {
{ 0, 0, 1, 1}),
0.5f, 0.001f);
EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc"));
EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &lparam));
delete metric;
metric = xgboost::Metric::Create("error@0.5f");
metric->Configure({N_GPU()});
metric = xgboost::Metric::Create("error@0.5f", &lparam);
metric->Configure({});
EXPECT_STREQ(metric->Name(), "error");
delete metric;
metric = xgboost::Metric::Create("error@0.1");
metric->Configure({N_GPU()});
metric = xgboost::Metric::Create("error@0.1", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "error@0.1");
EXPECT_STREQ(metric->Name(), "error@0.1");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
@@ -82,8 +78,9 @@ TEST(Metric, DeclareUnifiedTest(Error)) {
}
TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) {
xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik");
metric->Configure({N_GPU()});
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "poisson-nloglik");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -96,8 +93,9 @@ TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) {
#if defined(XGBOOST_USE_NCCL) && defined(__CUDACC__)
TEST(Metric, MGPU_RMSE) {
{
xgboost::Metric * metric = xgboost::Metric::Create("rmse");
metric->Configure({Arg{"n_gpus", "-1"}});
auto lparam = xgboost::CreateEmptyGenericParam(0, -1);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0}, {0}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -108,8 +106,8 @@ TEST(Metric, MGPU_RMSE) {
}
{
xgboost::Metric * metric = xgboost::Metric::Create("rmse");
metric->Configure({Arg{"n_gpus", "-1"}, Arg{"gpu_id", "1"}});
auto lparam = xgboost::CreateEmptyGenericParam(1, -1);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,

View File

@@ -4,15 +4,16 @@
#include "../helpers.h"
TEST(Metric, UnknownMetric) {
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
xgboost::Metric * metric = nullptr;
EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name"));
EXPECT_NO_THROW(metric = xgboost::Metric::Create("rmse"));
EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name", &tparam));
EXPECT_NO_THROW(metric = xgboost::Metric::Create("rmse", &tparam));
if (metric) {
delete metric;
}
metric = nullptr;
EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name@1"));
EXPECT_NO_THROW(metric = xgboost::Metric::Create("error@0.5f"));
EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name@1", &tparam));
EXPECT_NO_THROW(metric = xgboost::Metric::Create("error@0.5f", &tparam));
if (metric) {
delete metric;
}

View File

@@ -4,17 +4,12 @@
#include "../helpers.h"
using Arg = std::pair<std::string, std::string>;
#if defined(__CUDACC__)
#define N_GPU() Arg{"n_gpus", "1"}
#else
#define N_GPU() Arg{"n_gpus", "0"}
#endif
inline void TestMultiClassError(std::vector<Arg> args) {
xgboost::Metric * metric = xgboost::Metric::Create("merror");
metric->Configure(args);
inline void TestMultiClassError(xgboost::GPUSet const& devices) {
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
lparam.gpu_id = *devices.begin();
lparam.n_gpus = devices.Size();
xgboost::Metric * metric = xgboost::Metric::Create("merror", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "merror");
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
EXPECT_NEAR(GetMetricEval(
@@ -28,12 +23,16 @@ inline void TestMultiClassError(std::vector<Arg> args) {
}
TEST(Metric, DeclareUnifiedTest(MultiClassError)) {
TestMultiClassError({N_GPU()});
auto devices = xgboost::GPUSet::Range(0, NGPUS);
TestMultiClassError(devices);
}
inline void TestMultiClassLogLoss(std::vector<Arg> args) {
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss");
metric->Configure(args);
inline void TestMultiClassLogLoss(xgboost::GPUSet const& devices) {
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
lparam.gpu_id = *devices.begin();
lparam.n_gpus = devices.Size();
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mlogloss");
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
EXPECT_NEAR(GetMetricEval(
@@ -47,15 +46,27 @@ inline void TestMultiClassLogLoss(std::vector<Arg> args) {
}
TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) {
TestMultiClassLogLoss({N_GPU()});
auto devices = xgboost::GPUSet::Range(0, NGPUS);
TestMultiClassLogLoss(devices);
}
#if defined(XGBOOST_USE_NCCL) && defined(__CUDACC__)
TEST(Metric, MGPU_MultiClassError) {
TestMultiClassError({Arg{"n_gpus", "-1"}});
TestMultiClassError({Arg{"n_gpus", "-1"}, Arg{"gpu_id", "1"}});
TestMultiClassLogLoss({Arg{"n_gpus", "-1"}});
TestMultiClassLogLoss({Arg{"n_gpus", "-1"}, Arg{"gpu_id", "1"}});
{
auto devices = xgboost::GPUSet::All(0, -1);
TestMultiClassError(devices);
}
{
auto devices = xgboost::GPUSet::All(1, -1);
TestMultiClassError(devices);
}
{
auto devices = xgboost::GPUSet::All(0, -1);
TestMultiClassLogLoss(devices);
}
{
auto devices = xgboost::GPUSet::All(1, -1);
TestMultiClassLogLoss(devices);
}
}
#endif // defined(XGBOOST_USE_NCCL)

View File

@@ -4,8 +4,9 @@
#include "../helpers.h"
TEST(Metric, AMS) {
EXPECT_ANY_THROW(xgboost::Metric::Create("ams"));
xgboost::Metric * metric = xgboost::Metric::Create("ams@0.5f");
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
EXPECT_ANY_THROW(xgboost::Metric::Create("ams", &tparam));
xgboost::Metric * metric = xgboost::Metric::Create("ams@0.5f", &tparam);
ASSERT_STREQ(metric->Name(), "ams@0.5");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
@@ -14,7 +15,7 @@ TEST(Metric, AMS) {
0.29710f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ams@0");
metric = xgboost::Metric::Create("ams@0", &tparam);
ASSERT_STREQ(metric->Name(), "ams@0");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f);
@@ -22,7 +23,8 @@ TEST(Metric, AMS) {
}
TEST(Metric, AUC) {
xgboost::Metric * metric = xgboost::Metric::Create("auc");
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
xgboost::Metric * metric = xgboost::Metric::Create("auc", &tparam);
ASSERT_STREQ(metric->Name(), "auc");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -36,7 +38,8 @@ TEST(Metric, AUC) {
}
TEST(Metric, AUCPR) {
xgboost::Metric *metric = xgboost::Metric::Create("aucpr");
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
xgboost::Metric *metric = xgboost::Metric::Create("aucpr", &tparam);
ASSERT_STREQ(metric->Name(), "aucpr");
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}),
@@ -62,7 +65,8 @@ TEST(Metric, Precision) {
// When the limit for precision is not given, it takes the limit at
// std::numeric_limits<unsigned>::max(); hence all values are very small
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
xgboost::Metric * metric = xgboost::Metric::Create("pre");
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
xgboost::Metric * metric = xgboost::Metric::Create("pre", &tparam);
ASSERT_STREQ(metric->Name(), "pre");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-7);
EXPECT_NEAR(GetMetricEval(metric,
@@ -71,7 +75,7 @@ TEST(Metric, Precision) {
0, 1e-7);
delete metric;
metric = xgboost::Metric::Create("pre@2");
metric = xgboost::Metric::Create("pre@2", &tparam);
ASSERT_STREQ(metric->Name(), "pre@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-7);
EXPECT_NEAR(GetMetricEval(metric,
@@ -85,7 +89,8 @@ TEST(Metric, Precision) {
}
TEST(Metric, NDCG) {
xgboost::Metric * metric = xgboost::Metric::Create("ndcg");
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
xgboost::Metric * metric = xgboost::Metric::Create("ndcg", &tparam);
ASSERT_STREQ(metric->Name(), "ndcg");
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
EXPECT_NEAR(GetMetricEval(metric,
@@ -98,7 +103,7 @@ TEST(Metric, NDCG) {
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2");
metric = xgboost::Metric::Create("ndcg@2", &tparam);
ASSERT_STREQ(metric->Name(), "ndcg@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -107,7 +112,7 @@ TEST(Metric, NDCG) {
0.3868f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@-");
metric = xgboost::Metric::Create("ndcg@-", &tparam);
ASSERT_STREQ(metric->Name(), "ndcg@-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
@@ -119,7 +124,7 @@ TEST(Metric, NDCG) {
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2-");
metric = xgboost::Metric::Create("ndcg@2-", &tparam);
ASSERT_STREQ(metric->Name(), "ndcg@2-");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -131,7 +136,8 @@ TEST(Metric, NDCG) {
}
TEST(Metric, MAP) {
xgboost::Metric * metric = xgboost::Metric::Create("map");
auto tparam = xgboost::CreateEmptyGenericParam(0, 0);
xgboost::Metric * metric = xgboost::Metric::Create("map", &tparam);
ASSERT_STREQ(metric->Name(), "map");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
@@ -143,14 +149,14 @@ TEST(Metric, MAP) {
std::vector<xgboost::bst_float>{}), 1, 1e-10);
delete metric;
metric = xgboost::Metric::Create("map@-");
metric = xgboost::Metric::Create("map@-", &tparam);
ASSERT_STREQ(metric->Name(), "map@-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
delete metric;
metric = xgboost::Metric::Create("map@2");
metric = xgboost::Metric::Create("map@2", &tparam);
ASSERT_STREQ(metric->Name(), "map@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,