Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
96 lines
3.2 KiB
C++
96 lines
3.2 KiB
C++
/*!
|
|
* Copyright 2015-2022 by Contributors
|
|
* \file custom_metric.cc
|
|
* \brief This is an example to define plugin of xgboost.
|
|
* This plugin defines the additional metric function.
|
|
*/
|
|
#include <xgboost/base.h>
|
|
#include <xgboost/parameter.h>
|
|
#include <xgboost/objective.h>
|
|
#include <xgboost/json.h>
|
|
|
|
namespace xgboost {
|
|
namespace obj {
|
|
|
|
// This is a helpful data structure to define parameters
|
|
// You do not have to use it.
|
|
// see http://dmlc-core.readthedocs.org/en/latest/parameter.html
|
|
// for introduction of this module.
|
|
struct MyLogisticParam : public XGBoostParameter<MyLogisticParam> {
|
|
float scale_neg_weight;
|
|
// declare parameters
|
|
DMLC_DECLARE_PARAMETER(MyLogisticParam) {
|
|
DMLC_DECLARE_FIELD(scale_neg_weight).set_default(1.0f).set_lower_bound(0.0f)
|
|
.describe("Scale the weight of negative examples by this factor");
|
|
}
|
|
};
|
|
|
|
DMLC_REGISTER_PARAMETER(MyLogisticParam);
|
|
|
|
// Define a customized logistic regression objective in C++.
|
|
// Implement the interface.
|
|
class MyLogistic : public ObjFunction {
|
|
public:
|
|
void Configure(const Args& args) override { param_.UpdateAllowUnknown(args); }
|
|
|
|
ObjInfo Task() const override { return ObjInfo::kRegression; }
|
|
|
|
void GetGradient(const HostDeviceVector<bst_float> &preds,
|
|
const MetaInfo &info,
|
|
int iter,
|
|
HostDeviceVector<GradientPair> *out_gpair) override {
|
|
out_gpair->Resize(preds.Size());
|
|
const std::vector<bst_float>& preds_h = preds.HostVector();
|
|
std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
|
|
auto const labels_h = info.labels.HostView();
|
|
for (size_t i = 0; i < preds_h.size(); ++i) {
|
|
bst_float w = info.GetWeight(i);
|
|
// scale the negative examples!
|
|
if (labels_h(i) == 0.0f) w *= param_.scale_neg_weight;
|
|
// logistic transformation
|
|
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
|
|
// this is the gradient
|
|
bst_float grad = (p - labels_h(i)) * w;
|
|
// this is the second order gradient
|
|
bst_float hess = p * (1.0f - p) * w;
|
|
out_gpair_h.at(i) = GradientPair(grad, hess);
|
|
}
|
|
}
|
|
const char* DefaultEvalMetric() const override {
|
|
return "logloss";
|
|
}
|
|
void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {
|
|
// transform margin value to probability.
|
|
std::vector<bst_float> &preds = io_preds->HostVector();
|
|
for (auto& pred : preds) {
|
|
pred = 1.0f / (1.0f + std::exp(-pred));
|
|
}
|
|
}
|
|
bst_float ProbToMargin(bst_float base_score) const override {
|
|
// transform probability to margin value
|
|
return -std::log(1.0f / base_score - 1.0f);
|
|
}
|
|
|
|
void SaveConfig(Json* p_out) const override {
|
|
auto& out = *p_out;
|
|
out["name"] = String("my_logistic");
|
|
out["my_logistic_param"] = ToJson(param_);
|
|
}
|
|
|
|
void LoadConfig(Json const& in) override {
|
|
FromJson(in["my_logistic_param"], ¶m_);
|
|
}
|
|
|
|
private:
|
|
MyLogisticParam param_;
|
|
};
|
|
|
|
// Finally register the objective function.
|
|
// After it succeeds you can try use xgboost with objective=mylogistic
|
|
XGBOOST_REGISTER_OBJECTIVE(MyLogistic, "mylogistic")
|
|
.describe("User defined logistic regression plugin")
|
|
.set_body([]() { return new MyLogistic(); });
|
|
|
|
} // namespace obj
|
|
} // namespace xgboost
|