De-duplicate GPU parameters. (#4454)

* Only define `gpu_id` and `n_gpus` in `LearnerTrainParam`
* Pass LearnerTrainParam through XGBoost vid factory method.
* Disable all GPU usage when GPU related parameters are not specified (fixes XGBoost choosing GPU over aggressively).
* Test learner train param io.
* Fix gpu pickling.
This commit is contained in:
Jiaming Yuan
2019-05-29 11:55:57 +08:00
committed by GitHub
parent a3fedbeaa8
commit c589eff941
69 changed files with 927 additions and 562 deletions

View File

@@ -314,11 +314,6 @@ struct EvalEWiseBase : public Metric {
explicit EvalEWiseBase(char const* policy_param) :
policy_{policy_param}, reducer_{policy_} {}
void Configure(
const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
bst_float Eval(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
bool distributed) override {
@@ -328,7 +323,7 @@ struct EvalEWiseBase : public Metric {
<< "hint: use merror or mlogloss for multi-class classification";
const auto ndata = static_cast<omp_ulong>(info.labels_.Size());
// Dealing with ndata < n_gpus.
GPUSet devices = GPUSet::All(param_.gpu_id, param_.n_gpus, ndata);
GPUSet devices = GPUSet::All(tparam_->gpu_id, tparam_->n_gpus, ndata);
auto result =
reducer_.Reduce(devices, info.weights_, info.labels_, preds);
@@ -347,8 +342,6 @@ struct EvalEWiseBase : public Metric {
private:
Policy policy_;
MetricParam param_;
ElementWiseMetricsReduction<Policy> reducer_;
};

View File

@@ -1,19 +1,18 @@
/*!
* Copyright 2015 by Contributors
* Copyright 2015-2019 by Contributors
* \file metric_registry.cc
* \brief Registry of objective functions.
*/
#include <xgboost/metric.h>
#include <dmlc/registry.h>
#include "metric_common.h"
#include <xgboost/metric.h>
#include <xgboost/generic_parameters.h>
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
}
namespace xgboost {
Metric* Metric::Create(const std::string& name) {
Metric* Metric::Create(const std::string& name, LearnerTrainParam const* tparam) {
std::string buf = name;
std::string prefix = name;
auto pos = buf.find('@');
@@ -22,21 +21,24 @@ Metric* Metric::Create(const std::string& name) {
if (e == nullptr) {
LOG(FATAL) << "Unknown metric function " << name;
}
return (e->body)(nullptr);
auto p_metric = (e->body)(nullptr);
p_metric->tparam_ = tparam;
return p_metric;
} else {
std::string prefix = buf.substr(0, pos);
auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
if (e == nullptr) {
LOG(FATAL) << "Unknown metric function " << name;
}
return (e->body)(buf.substr(pos + 1, buf.length()).c_str());
auto p_metric = (e->body)(buf.substr(pos + 1, buf.length()).c_str());
p_metric->tparam_ = tparam;
return p_metric;
}
}
} // namespace xgboost
namespace xgboost {
namespace metric {
DMLC_REGISTER_PARAMETER(MetricParam);
// List of files that will be force linked in static links.
DMLC_REGISTRY_LINK_TAG(elementwise_metric);

View File

@@ -11,20 +11,6 @@
namespace xgboost {
namespace metric {
// Created exclusively for GPU.
struct MetricParam : public dmlc::Parameter<MetricParam> {
int n_gpus;
int gpu_id;
DMLC_DECLARE_PARAMETER(MetricParam) {
DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll)
.describe("Number of GPUs to use for multi-gpu algorithms.");
DMLC_DECLARE_FIELD(gpu_id)
.set_lower_bound(0)
.set_default(0)
.describe("gpu to use for objective function evaluation");
};
};
class PackedReduceResult {
double residue_sum_;
double weights_sum_;

View File

@@ -168,11 +168,6 @@ class MultiClassMetricsReduction {
*/
template<typename Derived>
struct EvalMClassBase : public Metric {
void Configure(
const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
bst_float Eval(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
bool distributed) override {
@@ -185,7 +180,7 @@ struct EvalMClassBase : public Metric {
<< " use logloss for binary classification";
const auto ndata = static_cast<bst_omp_uint>(info.labels_.Size());
GPUSet devices = GPUSet::All(param_.gpu_id, param_.n_gpus, ndata);
GPUSet devices = GPUSet::All(tparam_->gpu_id, tparam_->n_gpus, ndata);
auto result = reducer_.Reduce(devices, nclass, info.weights_, info.labels_, preds);
double dat[2] { result.Residue(), result.Weights() };
@@ -215,7 +210,6 @@ struct EvalMClassBase : public Metric {
private:
MultiClassMetricsReduction<Derived> reducer_;
MetricParam param_;
// used to store error message
const char *error_msg_;
};