Add Model and Configurable interface. (#4945)

* Apply Configurable to objective functions.
* Apply Model to Learner and Regtree, gbm.
* Add Load/SaveConfig to objs.
* Refactor obj tests to use smart pointer.
* Dummy methods for Save/Load Model.
This commit is contained in:
Jiaming Yuan
2019-10-18 01:56:02 -04:00
committed by GitHub
parent 9fc681001a
commit ae536756ae
31 changed files with 521 additions and 187 deletions

View File

@@ -1,10 +1,11 @@
/*!
* Copyright 2018 by Contributors
* Copyright 2018-2019 by Contributors
* \file hinge.cc
* \brief Provides an implementation of the hinge loss function
* \author Henry Gouk
*/
#include "xgboost/objective.h"
#include "xgboost/json.h"
#include "xgboost/span.h"
#include "xgboost/host_device_vector.h"
@@ -76,6 +77,12 @@ class HingeObj : public ObjFunction {
const char* DefaultEvalMetric() const override {
return "error";
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("binary:hinge");
}
void LoadConfig(Json const& in) override {}
};
// register the objective functions

View File

@@ -14,6 +14,7 @@
#include <limits>
#include <utility>
#include "xgboost/json.h"
#include "../common/common.h"
#include "../common/math.h"
#include "../common/transform.h"
@@ -25,7 +26,7 @@ namespace obj {
DMLC_REGISTRY_FILE_TAG(multiclass_obj_gpu);
#endif // defined(XGBOOST_USE_CUDA)
struct SoftmaxMultiClassParam : public dmlc::Parameter<SoftmaxMultiClassParam> {
struct SoftmaxMultiClassParam : public XGBoostParameter<SoftmaxMultiClassParam> {
int num_class;
// declare parameters
DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) {
@@ -37,10 +38,10 @@ struct SoftmaxMultiClassParam : public dmlc::Parameter<SoftmaxMultiClassParam> {
class SoftmaxMultiClassObj : public ObjFunction {
public:
explicit SoftmaxMultiClassObj(bool output_prob)
: output_prob_(output_prob) {
}
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
: output_prob_(output_prob) {}
void Configure(Args const& args) override {
param_.UpdateAllowUnknown(args);
}
void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
@@ -155,6 +156,20 @@ class SoftmaxMultiClassObj : public ObjFunction {
}
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
if (this->output_prob_) {
out["name"] = String("multi:softprob");
} else {
out["name"] = String("multi:softmax");
}
out["softmax_multiclass_param"] = toJson(param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["softmax_multiclass_param"], &param_);
}
private:
// output probability
bool output_prob_;

View File

@@ -6,6 +6,8 @@
#include <xgboost/objective.h>
#include <dmlc/registry.h>
#include <sstream>
#include "xgboost/host_device_vector.h"
namespace dmlc {
@@ -17,10 +19,12 @@ namespace xgboost {
ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const* tparam) {
auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
if (e == nullptr) {
std::stringstream ss;
for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {
LOG(INFO) << "Objective candidate: " << entry->name;
ss << "Objective candidate: " << entry->name << "\n";
}
LOG(FATAL) << "Unknown objective function " << name;
LOG(FATAL) << "Unknown objective function: `" << name << "`\n"
<< ss.str();
}
auto pobj = (e->body)();
pobj->tparam_ = tparam;

View File

@@ -10,6 +10,10 @@
#include <vector>
#include <algorithm>
#include <utility>
#include "xgboost/json.h"
#include "xgboost/parameter.h"
#include "../common/math.h"
#include "../common/random.h"
@@ -18,7 +22,7 @@ namespace obj {
DMLC_REGISTRY_FILE_TAG(rank_obj);
struct LambdaRankParam : public dmlc::Parameter<LambdaRankParam> {
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
int num_pairsample;
float fix_list_weight;
// declare parameters
@@ -35,7 +39,7 @@ struct LambdaRankParam : public dmlc::Parameter<LambdaRankParam> {
class LambdaRankObj : public ObjFunction {
public:
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
param_.UpdateAllowUnknown(args);
}
void GetGradient(const HostDeviceVector<bst_float>& preds,
@@ -170,7 +174,16 @@ class LambdaRankObj : public ObjFunction {
virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
std::vector<LambdaPair> *io_pairs) = 0;
private:
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("LambdaRankObj");
out["lambda_rank_param"] = Object();
for (auto const& kv : param_.__DICT__()) {
out["lambda_rank_param"][kv.first] = kv.second;
}
}
protected:
LambdaRankParam param_;
};
@@ -178,6 +191,15 @@ class PairwiseRankObj: public LambdaRankObj{
protected:
void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
std::vector<LambdaPair> *io_pairs) override {}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("rank:pairwise");
out["lambda_rank_param"] = toJson(LambdaRankObj::param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["lambda_rank_param"], &(LambdaRankObj::param_));
}
};
// beta version: NDCG lambda rank
@@ -228,6 +250,14 @@ class LambdaRankObjNDCG : public LambdaRankObj {
}
return static_cast<bst_float>(sumdcg);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("rank:ndcg");
out["lambda_rank_param"] = toJson(LambdaRankObj::param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["lambda_rank_param"], &(LambdaRankObj::param_));
}
};
class LambdaRankObjMAP : public LambdaRankObj {
@@ -315,6 +345,15 @@ class LambdaRankObjMAP : public LambdaRankObj {
pair.neg_index, &map_stats);
}
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("rank:map");
out["lambda_rank_param"] = toJson(LambdaRankObj::param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["lambda_rank_param"], &(LambdaRankObj::param_));
}
};
// register the objective functions

View File

@@ -34,6 +34,8 @@ struct LinearSquareLoss {
static bst_float ProbToMargin(bst_float base_score) { return base_score; }
static const char* LabelErrorMsg() { return ""; }
static const char* DefaultEvalMetric() { return "rmse"; }
static const char* Name() { return "reg:squarederror"; }
};
struct SquaredLogError {
@@ -57,6 +59,8 @@ struct SquaredLogError {
return "label must be greater than -1 for rmsle so that log(label + 1) can be valid.";
}
static const char* DefaultEvalMetric() { return "rmsle"; }
static const char* Name() { return "reg:squaredlogerror"; }
};
// logistic loss for probability regression task
@@ -83,18 +87,21 @@ struct LogisticRegression {
}
static bst_float ProbToMargin(bst_float base_score) {
CHECK(base_score > 0.0f && base_score < 1.0f)
<< "base_score must be in (0,1) for logistic loss";
<< "base_score must be in (0,1) for logistic loss, got: " << base_score;
return -logf(1.0f / base_score - 1.0f);
}
static const char* LabelErrorMsg() {
return "label must be in [0,1] for logistic regression";
}
static const char* DefaultEvalMetric() { return "rmse"; }
static const char* Name() { return "reg:logistic"; }
};
// logistic loss for binary classification task
struct LogisticClassification : public LogisticRegression {
static const char* DefaultEvalMetric() { return "error"; }
static const char* Name() { return "binary:logistic"; }
};
// logistic loss, but predict un-transformed margin
@@ -125,6 +132,8 @@ struct LogisticRaw : public LogisticRegression {
return std::max(predt * (T(1.0f) - predt), eps);
}
static const char* DefaultEvalMetric() { return "auc"; }
static const char* Name() { return "binary:logitraw"; }
};
} // namespace obj

View File

@@ -12,8 +12,10 @@
#include <memory>
#include <vector>
#include "xgboost/span.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/json.h"
#include "xgboost/parameter.h"
#include "xgboost/span.h"
#include "../common/transform.h"
#include "../common/common.h"
@@ -27,7 +29,7 @@ namespace obj {
DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
#endif // defined(XGBOOST_USE_CUDA)
struct RegLossParam : public dmlc::Parameter<RegLossParam> {
struct RegLossParam : public XGBoostParameter<RegLossParam> {
float scale_pos_weight;
// declare parameters
DMLC_DECLARE_PARAMETER(RegLossParam) {
@@ -45,7 +47,7 @@ class RegLossObj : public ObjFunction {
RegLossObj() = default;
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
param_.UpdateAllowUnknown(args);
}
void GetGradient(const HostDeviceVector<bst_float>& preds,
@@ -114,6 +116,16 @@ class RegLossObj : public ObjFunction {
return Loss::ProbToMargin(base_score);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String(Loss::Name());
out["reg_loss_param"] = toJson(param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["reg_loss_param"], &param_);
}
protected:
RegLossParam param_;
};
@@ -121,23 +133,23 @@ class RegLossObj : public ObjFunction {
// register the objective functions
DMLC_REGISTER_PARAMETER(RegLossParam);
XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, "reg:squarederror")
XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, LinearSquareLoss::Name())
.describe("Regression with squared error.")
.set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
XGBOOST_REGISTER_OBJECTIVE(SquareLogError, "reg:squaredlogerror")
XGBOOST_REGISTER_OBJECTIVE(SquareLogError, SquaredLogError::Name())
.describe("Regression with root mean squared logarithmic error.")
.set_body([]() { return new RegLossObj<SquaredLogError>(); });
XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic")
XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, LogisticRegression::Name())
.describe("Logistic regression for probability regression task.")
.set_body([]() { return new RegLossObj<LogisticRegression>(); });
XGBOOST_REGISTER_OBJECTIVE(LogisticClassification, "binary:logistic")
XGBOOST_REGISTER_OBJECTIVE(LogisticClassification, LogisticClassification::Name())
.describe("Logistic regression for binary classification task.")
.set_body([]() { return new RegLossObj<LogisticClassification>(); });
XGBOOST_REGISTER_OBJECTIVE(LogisticRaw, "binary:logitraw")
XGBOOST_REGISTER_OBJECTIVE(LogisticRaw, LogisticRaw::Name())
.describe("Logistic regression for classification, output score "
"before logistic transformation.")
.set_body([]() { return new RegLossObj<LogisticRaw>(); });
@@ -151,7 +163,7 @@ XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
// End deprecated
// declare parameter
struct PoissonRegressionParam : public dmlc::Parameter<PoissonRegressionParam> {
struct PoissonRegressionParam : public XGBoostParameter<PoissonRegressionParam> {
float max_delta_step;
DMLC_DECLARE_PARAMETER(PoissonRegressionParam) {
DMLC_DECLARE_FIELD(max_delta_step).set_lower_bound(0.0f).set_default(0.7f)
@@ -165,7 +177,7 @@ class PoissonRegression : public ObjFunction {
public:
// declare functions
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
param_.UpdateAllowUnknown(args);
}
void GetGradient(const HostDeviceVector<bst_float>& preds,
@@ -227,6 +239,16 @@ class PoissonRegression : public ObjFunction {
return "poisson-nloglik";
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("count:poisson");
out["poisson_regression_param"] = toJson(param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["poisson_regression_param"], &param_);
}
private:
PoissonRegressionParam param_;
HostDeviceVector<int> label_correct_;
@@ -321,6 +343,12 @@ class CoxRegression : public ObjFunction {
const char* DefaultEvalMetric() const override {
return "cox-nloglik";
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("survival:cox");
}
void LoadConfig(Json const&) override {}
};
// register the objective function
@@ -391,6 +419,11 @@ class GammaRegression : public ObjFunction {
const char* DefaultEvalMetric() const override {
return "gamma-nloglik";
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("reg:gamma");
}
void LoadConfig(Json const&) override {}
private:
HostDeviceVector<int> label_correct_;
@@ -403,7 +436,7 @@ XGBOOST_REGISTER_OBJECTIVE(GammaRegression, "reg:gamma")
// declare parameter
struct TweedieRegressionParam : public dmlc::Parameter<TweedieRegressionParam> {
struct TweedieRegressionParam : public XGBoostParameter<TweedieRegressionParam> {
float tweedie_variance_power;
DMLC_DECLARE_PARAMETER(TweedieRegressionParam) {
DMLC_DECLARE_FIELD(tweedie_variance_power).set_range(1.0f, 2.0f).set_default(1.5f)
@@ -416,7 +449,7 @@ class TweedieRegression : public ObjFunction {
public:
// declare functions
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
param_.UpdateAllowUnknown(args);
std::ostringstream os;
os << "tweedie-nloglik@" << param_.tweedie_variance_power;
metric_ = os.str();
@@ -485,6 +518,15 @@ class TweedieRegression : public ObjFunction {
return metric_.c_str();
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("reg:tweedie");
out["tweedie_regression_param"] = toJson(param_);
}
void LoadConfig(Json const& in) override {
fromJson(in["tweedie_regression_param"], &param_);
}
private:
std::string metric_;
TweedieRegressionParam param_;