Implement slope for Pseduo-Huber. (#7727)

* Add objective and metric.
* Some refactoring for CPU/GPU dispatching using linalg module.
This commit is contained in:
Jiaming Yuan
2022-03-14 21:42:38 +08:00
committed by GitHub
parent 4dafb5fac8
commit 98d6faefd6
28 changed files with 456 additions and 290 deletions

View File

@@ -105,38 +105,6 @@ struct LogisticRegression {
static ObjInfo Info() { return {ObjInfo::kRegression, false}; }
};
struct PseudoHuberError {
XGBOOST_DEVICE static bst_float PredTransform(bst_float x) {
return x;
}
XGBOOST_DEVICE static bool CheckLabel(bst_float) {
return true;
}
XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
const float z = predt - label;
const float scale_sqrt = std::sqrt(1 + std::pow(z, 2));
return z/scale_sqrt;
}
XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float predt, bst_float label) {
const float scale = 1 + std::pow(predt - label, 2);
const float scale_sqrt = std::sqrt(scale);
return 1/(scale*scale_sqrt);
}
static bst_float ProbToMargin(bst_float base_score) {
return base_score;
}
static const char* LabelErrorMsg() {
return "";
}
static const char* DefaultEvalMetric() {
return "mphe";
}
static const char* Name() {
return "reg:pseudohubererror";
}
static ObjInfo Info() { return {ObjInfo::kRegression, false}; }
};
// logistic loss for binary classification task
struct LogisticClassification : public LogisticRegression {
static const char* DefaultEvalMetric() { return "logloss"; }

View File

@@ -8,23 +8,38 @@
#include <dmlc/omp.h>
#include <xgboost/logging.h>
#include <xgboost/objective.h>
#include <cmath>
#include <memory>
#include <vector>
#include "../common/common.h"
#include "../common/linalg_op.h"
#include "../common/pseudo_huber.h"
#include "../common/threading_utils.h"
#include "../common/transform.h"
#include "./regression_loss.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/json.h"
#include "xgboost/parameter.h"
#include "xgboost/span.h"
#include "../common/transform.h"
#include "../common/common.h"
#include "../common/threading_utils.h"
#include "./regression_loss.h"
#if defined(XGBOOST_USE_CUDA)
#include "../common/linalg_op.cuh"
#endif // defined(XGBOOST_USE_CUDA)
namespace xgboost {
namespace obj {
namespace {
void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
if (!info.weights_.Empty()) {
CHECK_EQ(info.weights_.Size(), info.num_row_)
<< "Number of weights should be equal to number of data points.";
}
}
} // anonymous namespace
#if defined(XGBOOST_USE_CUDA)
DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
@@ -64,20 +79,13 @@ class RegLossObj : public ObjFunction {
void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo &info, int,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds.Size(), info.labels.Size())
<< " " << "labels are not correctly provided"
<< "preds.size=" << preds.Size() << ", label.size=" << info.labels.Size() << ", "
<< "Loss: " << Loss::Name();
CheckRegInputs(info, preds);
size_t const ndata = preds.Size();
out_gpair->Resize(ndata);
auto device = ctx_->gpu_id;
additional_input_.HostVector().begin()[0] = 1; // Fill the label_correct flag
bool is_null_weight = info.weights_.Size() == 0;
if (!is_null_weight) {
CHECK_EQ(info.weights_.Size(), info.labels.Shape(0))
<< "Number of weights should be equal to number of data points.";
}
auto scale_pos_weight = param_.scale_pos_weight;
additional_input_.HostVector().begin()[1] = scale_pos_weight;
additional_input_.HostVector().begin()[2] = is_null_weight;
@@ -179,10 +187,6 @@ XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, LogisticRegression::Name())
.describe("Logistic regression for probability regression task.")
.set_body([]() { return new RegLossObj<LogisticRegression>(); });
XGBOOST_REGISTER_OBJECTIVE(PseudoHuberError, PseudoHuberError::Name())
.describe("Regression Pseudo Huber error.")
.set_body([]() { return new RegLossObj<PseudoHuberError>(); });
XGBOOST_REGISTER_OBJECTIVE(LogisticClassification, LogisticClassification::Name())
.describe("Logistic regression for binary classification task.")
.set_body([]() { return new RegLossObj<LogisticClassification>(); });
@@ -200,6 +204,70 @@ XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
return new RegLossObj<LinearSquareLoss>(); });
// End deprecated
class PseudoHuberRegression : public ObjFunction {
PesudoHuberParam param_;
public:
void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
struct ObjInfo Task() const override { return {ObjInfo::kRegression, false}; }
uint32_t Targets(MetaInfo const& info) const override {
return std::max(static_cast<size_t>(1), info.labels.Shape(1));
}
void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CheckRegInputs(info, preds);
auto slope = param_.huber_slope;
CHECK_NE(slope, 0.0) << "slope for pseudo huber cannot be 0.";
auto labels = info.labels.View(ctx_->gpu_id);
out_gpair->SetDevice(ctx_->gpu_id);
out_gpair->Resize(info.labels.Size());
auto gpair = linalg::MakeVec(out_gpair);
preds.SetDevice(ctx_->gpu_id);
auto predt = linalg::MakeVec(&preds);
info.weights_.SetDevice(ctx_->gpu_id);
common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
: info.weights_.ConstDeviceSpan()};
linalg::ElementWiseKernel(ctx_, labels, [=] XGBOOST_DEVICE(size_t i, float const y) mutable {
auto sample_id = std::get<0>(linalg::UnravelIndex(i, labels.Shape()));
const float z = predt(i) - y;
const float scale_sqrt = std::sqrt(1 + common::Sqr(z) / common::Sqr(slope));
float grad = z / scale_sqrt;
auto scale = common::Sqr(slope) + common::Sqr(z);
float hess = common::Sqr(slope) / (scale * scale_sqrt);
auto w = weight[sample_id];
gpair(i) = {grad * w, hess * w};
});
}
const char* DefaultEvalMetric() const override { return "mphe"; }
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("reg:pseudohubererror");
out["pseduo_huber_param"] = ToJson(param_);
}
void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in);
if (config.find("pseduo_huber_param") == config.cend()) {
// The parameter is added in 1.6.
return;
}
FromJson(in["pseduo_huber_param"], &param_);
}
};
XGBOOST_REGISTER_OBJECTIVE(PseudoHuberRegression, "reg:pseudohubererror")
.describe("Regression Pseudo Huber error.")
.set_body([]() { return new PseudoHuberRegression(); });
// declare parameter
struct PoissonRegressionParam : public XGBoostParameter<PoissonRegressionParam> {
float max_delta_step;