Calculate base_score based on input labels for mae. (#8107)
Fit an intercept as base score for abs loss.
This commit is contained in:
344
src/learner.cc
344
src/learner.cc
@@ -4,47 +4,48 @@
|
||||
* \brief Implementation of learning algorithm.
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include "xgboost/learner.h"
|
||||
|
||||
#include <dmlc/any.h>
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/parameter.h>
|
||||
#include <dmlc/thread_local.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "dmlc/any.h"
|
||||
#include "common/charconv.h"
|
||||
#include "common/common.h"
|
||||
#include "common/io.h"
|
||||
#include "common/linalg_op.h"
|
||||
#include "common/observer.h"
|
||||
#include "common/random.h"
|
||||
#include "common/threading_utils.h"
|
||||
#include "common/timer.h"
|
||||
#include "common/version.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/model.h"
|
||||
#include "xgboost/predictor.h"
|
||||
#include "xgboost/feature_map.h"
|
||||
#include "xgboost/gbm.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/metric.h"
|
||||
#include "xgboost/model.h"
|
||||
#include "xgboost/objective.h"
|
||||
#include "xgboost/parameter.h"
|
||||
|
||||
#include "common/common.h"
|
||||
#include "common/io.h"
|
||||
#include "common/observer.h"
|
||||
#include "common/random.h"
|
||||
#include "common/timer.h"
|
||||
#include "common/charconv.h"
|
||||
#include "common/version.h"
|
||||
#include "common/threading_utils.h"
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -85,26 +86,29 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
uint32_t minor_version;
|
||||
|
||||
uint32_t num_target{1};
|
||||
|
||||
int32_t base_score_estimated{0};
|
||||
/*! \brief reserved field */
|
||||
int reserved[26];
|
||||
int reserved[25];
|
||||
/*! \brief constructor */
|
||||
LearnerModelParamLegacy() {
|
||||
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
|
||||
base_score = 0.5f;
|
||||
base_score = ObjFunction::DefaultBaseScore();
|
||||
num_target = 1;
|
||||
major_version = std::get<0>(Version::Self());
|
||||
minor_version = std::get<1>(Version::Self());
|
||||
base_score_estimated = 0;
|
||||
static_assert(sizeof(LearnerModelParamLegacy) == 136,
|
||||
"Do not change the size of this struct, as it will break binary IO.");
|
||||
}
|
||||
|
||||
// Skip other legacy fields.
|
||||
Json ToJson() const {
|
||||
Object obj;
|
||||
char floats[NumericLimits<float>::kToCharsSize];
|
||||
auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
|
||||
CHECK(ret.ec == std::errc());
|
||||
obj["base_score"] =
|
||||
std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
|
||||
CHECK(ret.ec == std::errc{});
|
||||
obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
|
||||
|
||||
char integers[NumericLimits<int64_t>::kToCharsSize];
|
||||
ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
|
||||
@@ -136,10 +140,14 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
}
|
||||
|
||||
this->Init(m);
|
||||
|
||||
std::string str = get<String const>(j_param.at("base_score"));
|
||||
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
|
||||
// It can only be estimated during the first training, we consider it estimated afterward
|
||||
base_score_estimated = 1;
|
||||
}
|
||||
inline LearnerModelParamLegacy ByteSwap() const {
|
||||
|
||||
LearnerModelParamLegacy ByteSwap() const {
|
||||
LearnerModelParamLegacy x = *this;
|
||||
dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
|
||||
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
|
||||
@@ -149,14 +157,30 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
|
||||
dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
|
||||
dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
|
||||
dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
|
||||
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
Args UpdateAllowUnknown(Container const& kwargs) {
|
||||
// Detect whether user has made their own base score.
|
||||
if (std::find_if(kwargs.cbegin(), kwargs.cend(),
|
||||
[](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
|
||||
base_score_estimated = true;
|
||||
}
|
||||
if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
|
||||
return kv.first == "base_score_estimated";
|
||||
}) != kwargs.cend()) {
|
||||
LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
|
||||
}
|
||||
return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
|
||||
}
|
||||
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
|
||||
DMLC_DECLARE_FIELD(base_score)
|
||||
.set_default(0.5f)
|
||||
.set_default(ObjFunction::DefaultBaseScore())
|
||||
.describe("Global bias of the model.");
|
||||
DMLC_DECLARE_FIELD(num_feature)
|
||||
.set_default(0)
|
||||
@@ -170,12 +194,12 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
.set_default(1)
|
||||
.set_lower_bound(1)
|
||||
.describe("Number of target for multi-target regression.");
|
||||
DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
|
||||
}
|
||||
};
|
||||
|
||||
LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin,
|
||||
ObjInfo t)
|
||||
: base_score{base_margin}, num_feature{user_param.num_feature}, task{t} {
|
||||
LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
|
||||
: num_feature{user_param.num_feature}, task{t} {
|
||||
auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
|
||||
auto n_targets = user_param.num_target;
|
||||
num_output_group = std::max(n_classes, n_targets);
|
||||
@@ -185,6 +209,53 @@ LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
|
||||
<< ", n_targets:" << n_targets;
|
||||
}
|
||||
|
||||
LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
|
||||
linalg::Tensor<float, 1> base_margin, ObjInfo t)
|
||||
: LearnerModelParam{user_param, t} {
|
||||
std::swap(base_score_, base_margin);
|
||||
// Make sure read access everywhere for thread-safe prediction.
|
||||
common::AsConst(base_score_).HostView();
|
||||
if (!ctx->IsCPU()) {
|
||||
common::AsConst(base_score_).View(ctx->gpu_id);
|
||||
}
|
||||
CHECK(common::AsConst(base_score_).Data()->HostCanRead());
|
||||
}
|
||||
|
||||
linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
|
||||
// multi-class is not yet supported.
|
||||
CHECK_EQ(base_score_.Size(), 1);
|
||||
if (device == Context::kCpuId) {
|
||||
// Make sure that we won't run into race condition.
|
||||
CHECK(base_score_.Data()->HostCanRead());
|
||||
return base_score_.HostView();
|
||||
}
|
||||
// Make sure that we won't run into race condition.
|
||||
CHECK(base_score_.Data()->DeviceCanRead());
|
||||
auto v = base_score_.View(device);
|
||||
CHECK(base_score_.Data()->HostCanRead()); // make sure read access is not removed.
|
||||
return v;
|
||||
}
|
||||
|
||||
linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(Context const* ctx) const {
|
||||
return this->BaseScore(ctx->gpu_id);
|
||||
}
|
||||
|
||||
void LearnerModelParam::Copy(LearnerModelParam const& that) {
|
||||
base_score_.Reshape(that.base_score_.Shape());
|
||||
base_score_.Data()->SetDevice(that.base_score_.DeviceIdx());
|
||||
base_score_.Data()->Copy(*that.base_score_.Data());
|
||||
common::AsConst(base_score_).HostView();
|
||||
if (that.base_score_.DeviceIdx() != Context::kCpuId) {
|
||||
common::AsConst(base_score_).View(that.base_score_.DeviceIdx());
|
||||
}
|
||||
CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead());
|
||||
CHECK(base_score_.Data()->HostCanRead());
|
||||
|
||||
num_feature = that.num_feature;
|
||||
num_output_group = that.num_output_group;
|
||||
task = that.task;
|
||||
}
|
||||
|
||||
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
||||
// data split mode, can be row, col, or none.
|
||||
DataSplitMode dsplit {DataSplitMode::kAuto};
|
||||
@@ -308,8 +379,61 @@ class LearnerConfiguration : public Learner {
|
||||
LearnerModelParamLegacy mparam_;
|
||||
LearnerModelParam learner_model_param_;
|
||||
LearnerTrainParam tparam_;
|
||||
// Initial prediction.
|
||||
std::vector<std::string> metric_names_;
|
||||
|
||||
/**
|
||||
* \brief Calculate the `base_score` based on input data.
|
||||
*
|
||||
* \param p_fmat The training DMatrix used to estimate the base score.
|
||||
*/
|
||||
void InitBaseScore(DMatrix const* p_fmat) {
|
||||
// Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
|
||||
// After 1.0.0 we save the value provided by user and keep it immutable instead. To
|
||||
// keep the stability, we initialize it in binary LoadModel instead of configuration.
|
||||
// Under what condition should we omit the transformation:
|
||||
//
|
||||
// - base_score is loaded from old binary model.
|
||||
//
|
||||
// What are the other possible conditions:
|
||||
//
|
||||
// - model loaded from new binary or JSON.
|
||||
// - model is created from scratch.
|
||||
// - model is configured second time due to change of parameter
|
||||
CHECK(obj_);
|
||||
if (!mparam_.base_score_estimated) {
|
||||
if (p_fmat) {
|
||||
// We estimate it from input data.
|
||||
linalg::Tensor<float, 1> base_score;
|
||||
obj_->InitEstimation(p_fmat->Info(), &base_score);
|
||||
mparam_.base_score = base_score(0);
|
||||
CHECK(!std::isnan(mparam_.base_score));
|
||||
} else {
|
||||
mparam_.base_score = ObjFunction::DefaultBaseScore();
|
||||
}
|
||||
mparam_.base_score_estimated = true;
|
||||
// Update the shared model parameter
|
||||
this->ConfigureModelParam();
|
||||
}
|
||||
}
|
||||
|
||||
// Convert mparam to learner_model_param
|
||||
void ConfigureModelParam() {
|
||||
this->ConfigureTargets();
|
||||
|
||||
CHECK(obj_);
|
||||
auto task = obj_->Task();
|
||||
linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
|
||||
auto h_base_score = base_score.HostView();
|
||||
|
||||
// transform to margin
|
||||
h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
|
||||
// move it to model param, which is shared with all other components.
|
||||
learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
|
||||
CHECK(learner_model_param_.Initialized());
|
||||
CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
|
||||
: need_configuration_{true} {
|
||||
@@ -329,22 +453,24 @@ class LearnerConfiguration : public Learner {
|
||||
// Configuration before data is known.
|
||||
void Configure() override {
|
||||
// Varient of double checked lock
|
||||
if (!this->need_configuration_) { return; }
|
||||
if (!this->need_configuration_) {
|
||||
return;
|
||||
}
|
||||
std::lock_guard<std::mutex> guard(config_lock_);
|
||||
if (!this->need_configuration_) { return; }
|
||||
if (!this->need_configuration_) {
|
||||
return;
|
||||
}
|
||||
|
||||
monitor_.Start("Configure");
|
||||
auto old_tparam = tparam_;
|
||||
Args args = {cfg_.cbegin(), cfg_.cend()};
|
||||
|
||||
tparam_.UpdateAllowUnknown(args);
|
||||
auto mparam_backup = mparam_;
|
||||
|
||||
mparam_.UpdateAllowUnknown(args);
|
||||
|
||||
auto initialized = generic_parameters_.GetInitialised();
|
||||
auto old_seed = generic_parameters_.seed;
|
||||
generic_parameters_.UpdateAllowUnknown(args);
|
||||
auto initialized = ctx_.GetInitialised();
|
||||
auto old_seed = ctx_.seed;
|
||||
ctx_.UpdateAllowUnknown(args);
|
||||
|
||||
ConsoleLogger::Configure(args);
|
||||
|
||||
@@ -355,8 +481,8 @@ class LearnerConfiguration : public Learner {
|
||||
}
|
||||
|
||||
// set seed only before the model is initialized
|
||||
if (!initialized || generic_parameters_.seed != old_seed) {
|
||||
common::GlobalRandom().seed(generic_parameters_.seed);
|
||||
if (!initialized || ctx_.seed != old_seed) {
|
||||
common::GlobalRandom().seed(ctx_.seed);
|
||||
}
|
||||
|
||||
// must precede configure gbm since num_features is required for gbm
|
||||
@@ -364,31 +490,15 @@ class LearnerConfiguration : public Learner {
|
||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||
this->ConfigureObjective(old_tparam, &args);
|
||||
|
||||
auto task = this->ConfigureTargets();
|
||||
|
||||
// Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
|
||||
// After 1.0.0 we save the value provided by user and keep it immutable instead. To
|
||||
// keep the stability, we initialize it in binary LoadModel instead of configuration.
|
||||
// Under what condition should we omit the transformation:
|
||||
//
|
||||
// - base_score is loaded from old binary model.
|
||||
//
|
||||
// What are the other possible conditions:
|
||||
//
|
||||
// - model loaded from new binary or JSON.
|
||||
// - model is created from scratch.
|
||||
// - model is configured second time due to change of parameter
|
||||
if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) {
|
||||
learner_model_param_ =
|
||||
LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), task);
|
||||
}
|
||||
|
||||
learner_model_param_.task = obj_->Task(); // required by gbm configuration.
|
||||
this->ConfigureGBM(old_tparam, args);
|
||||
generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
|
||||
ctx_.ConfigureGpuId(this->gbm_->UseGPU());
|
||||
this->ConfigureModelParam();
|
||||
|
||||
this->ConfigureMetrics(args);
|
||||
|
||||
this->need_configuration_ = false;
|
||||
if (generic_parameters_.validate_parameters) {
|
||||
if (ctx_.validate_parameters) {
|
||||
this->ValidateParameters();
|
||||
}
|
||||
|
||||
@@ -396,6 +506,11 @@ class LearnerConfiguration : public Learner {
|
||||
monitor_.Stop("Configure");
|
||||
}
|
||||
|
||||
void CheckModelInitialized() const {
|
||||
CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
|
||||
CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
|
||||
}
|
||||
|
||||
virtual PredictionContainer* GetPredictionCache() const {
|
||||
return &((*ThreadLocalPredictionCache::Get())[this]);
|
||||
}
|
||||
@@ -417,7 +532,7 @@ class LearnerConfiguration : public Learner {
|
||||
|
||||
auto const& objective_fn = learner_parameters.at("objective");
|
||||
if (!obj_) {
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
|
||||
}
|
||||
obj_->LoadConfig(objective_fn);
|
||||
learner_model_param_.task = obj_->Task();
|
||||
@@ -425,7 +540,7 @@ class LearnerConfiguration : public Learner {
|
||||
tparam_.booster = get<String>(gradient_booster["name"]);
|
||||
if (!gbm_) {
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster,
|
||||
&generic_parameters_, &learner_model_param_));
|
||||
&ctx_, &learner_model_param_));
|
||||
}
|
||||
gbm_->LoadConfig(gradient_booster);
|
||||
|
||||
@@ -441,15 +556,15 @@ class LearnerConfiguration : public Learner {
|
||||
} else {
|
||||
metric_names_[i] = get<String>(j_metrics[i]["name"]);
|
||||
}
|
||||
metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &generic_parameters_));
|
||||
metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &ctx_));
|
||||
if (!old_serialization) {
|
||||
metrics_[i]->LoadConfig(j_metrics[i]);
|
||||
}
|
||||
}
|
||||
|
||||
FromJson(learner_parameters.at("generic_param"), &generic_parameters_);
|
||||
FromJson(learner_parameters.at("generic_param"), &ctx_);
|
||||
// make sure the GPU ID is valid in new environment before start running configure.
|
||||
generic_parameters_.ConfigureGpuId(false);
|
||||
ctx_.ConfigureGpuId(false);
|
||||
|
||||
this->need_configuration_ = true;
|
||||
}
|
||||
@@ -478,7 +593,7 @@ class LearnerConfiguration : public Learner {
|
||||
}
|
||||
learner_parameters["metrics"] = Array(std::move(metrics));
|
||||
|
||||
learner_parameters["generic_param"] = ToJson(generic_parameters_);
|
||||
learner_parameters["generic_param"] = ToJson(ctx_);
|
||||
}
|
||||
|
||||
void SetParam(const std::string& key, const std::string& value) override {
|
||||
@@ -551,7 +666,7 @@ class LearnerConfiguration : public Learner {
|
||||
return cfg_;
|
||||
}
|
||||
|
||||
GenericParameter const* Ctx() const override { return &generic_parameters_; }
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
|
||||
private:
|
||||
void ValidateParameters() {
|
||||
@@ -654,7 +769,7 @@ class LearnerConfiguration : public Learner {
|
||||
|
||||
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
|
||||
if (gbm_ == nullptr || old.booster != tparam_.booster) {
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
|
||||
&learner_model_param_));
|
||||
}
|
||||
gbm_->Configure(args);
|
||||
@@ -678,7 +793,7 @@ class LearnerConfiguration : public Learner {
|
||||
cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
|
||||
}
|
||||
if (obj_ == nullptr || tparam_.objective != old.objective) {
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
|
||||
}
|
||||
auto& args = *p_args;
|
||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||
@@ -691,7 +806,7 @@ class LearnerConfiguration : public Learner {
|
||||
return m->Name() != name;
|
||||
};
|
||||
if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
|
||||
metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_parameters_)));
|
||||
metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &ctx_)));
|
||||
mparam_.contain_eval_metrics = 1;
|
||||
}
|
||||
}
|
||||
@@ -703,7 +818,7 @@ class LearnerConfiguration : public Learner {
|
||||
/**
|
||||
* Get number of targets from objective function.
|
||||
*/
|
||||
ObjInfo ConfigureTargets() {
|
||||
void ConfigureTargets() {
|
||||
CHECK(this->obj_);
|
||||
auto const& cache = this->GetPredictionCache()->Container();
|
||||
size_t n_targets = 1;
|
||||
@@ -722,7 +837,6 @@ class LearnerConfiguration : public Learner {
|
||||
} else {
|
||||
mparam_.num_target = n_targets;
|
||||
}
|
||||
return this->obj_->Task();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -754,14 +868,14 @@ class LearnerIO : public LearnerConfiguration {
|
||||
|
||||
std::string name = get<String>(objective_fn["name"]);
|
||||
tparam_.UpdateAllowUnknown(Args{{"objective", name}});
|
||||
obj_.reset(ObjFunction::Create(name, &generic_parameters_));
|
||||
obj_.reset(ObjFunction::Create(name, &ctx_));
|
||||
obj_->LoadConfig(objective_fn);
|
||||
|
||||
auto const& gradient_booster = learner.at("gradient_booster");
|
||||
name = get<String>(gradient_booster["name"]);
|
||||
tparam_.UpdateAllowUnknown(Args{{"booster", name}});
|
||||
gbm_.reset(
|
||||
GradientBooster::Create(tparam_.booster, &generic_parameters_, &learner_model_param_));
|
||||
GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
|
||||
gbm_->LoadModel(gradient_booster);
|
||||
|
||||
auto const& j_attributes = get<Object const>(learner.at("attributes"));
|
||||
@@ -791,6 +905,7 @@ class LearnerIO : public LearnerConfiguration {
|
||||
|
||||
void SaveModel(Json* p_out) const override {
|
||||
CHECK(!this->need_configuration_) << "Call Configure before saving model.";
|
||||
this->CheckModelInitialized();
|
||||
|
||||
Version::Save(p_out);
|
||||
Json& out { *p_out };
|
||||
@@ -826,7 +941,7 @@ class LearnerIO : public LearnerConfiguration {
|
||||
|
||||
// About to be deprecated by JSON format
|
||||
void LoadModel(dmlc::Stream* fi) override {
|
||||
generic_parameters_.UpdateAllowUnknown(Args{});
|
||||
ctx_.UpdateAllowUnknown(Args{});
|
||||
tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
|
||||
// TODO(tqchen) mark deprecation of old format.
|
||||
common::PeekableInStream fp(fi);
|
||||
@@ -881,8 +996,8 @@ class LearnerIO : public LearnerConfiguration {
|
||||
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
|
||||
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
||||
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
|
||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
|
||||
&learner_model_param_));
|
||||
gbm_->Load(fi);
|
||||
if (mparam_.contain_extra_attrs != 0) {
|
||||
@@ -925,7 +1040,14 @@ class LearnerIO : public LearnerConfiguration {
|
||||
}
|
||||
|
||||
learner_model_param_ =
|
||||
LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), obj_->Task());
|
||||
LearnerModelParam(&ctx_, mparam_,
|
||||
linalg::Tensor<float, 1>{{std::isnan(mparam_.base_score)
|
||||
? std::numeric_limits<float>::quiet_NaN()
|
||||
: obj_->ProbToMargin(mparam_.base_score)},
|
||||
{1},
|
||||
Context::kCpuId},
|
||||
obj_->Task());
|
||||
|
||||
if (attributes_.find("objective") != attributes_.cend()) {
|
||||
auto obj_str = attributes_.at("objective");
|
||||
auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
|
||||
@@ -969,6 +1091,8 @@ class LearnerIO : public LearnerConfiguration {
|
||||
// Save model into binary format. The code is about to be deprecated by more robust
|
||||
// JSON serialization format.
|
||||
void SaveModel(dmlc::Stream* fo) const override {
|
||||
this->CheckModelInitialized();
|
||||
|
||||
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
|
||||
std::vector<std::pair<std::string, std::string> > extra_attr;
|
||||
mparam.contain_extra_attrs = 1;
|
||||
@@ -1000,6 +1124,7 @@ class LearnerIO : public LearnerConfiguration {
|
||||
}
|
||||
extra_attr.emplace_back("metrics", os.str());
|
||||
}
|
||||
|
||||
std::string header {"binf"};
|
||||
fo->Write(header.data(), 4);
|
||||
if (DMLC_IO_NO_ENDIAN_SWAP) {
|
||||
@@ -1022,6 +1147,8 @@ class LearnerIO : public LearnerConfiguration {
|
||||
}
|
||||
|
||||
void Save(dmlc::Stream* fo) const override {
|
||||
this->CheckModelInitialized();
|
||||
|
||||
Json memory_snapshot{Object()};
|
||||
memory_snapshot["Model"] = Object();
|
||||
auto& model = memory_snapshot["Model"];
|
||||
@@ -1108,28 +1235,30 @@ class LearnerImpl : public LearnerIO {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) override {
|
||||
this->Configure();
|
||||
this->CheckModelInitialized();
|
||||
|
||||
return gbm_->DumpModel(fmap, with_stats, format);
|
||||
}
|
||||
|
||||
Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
|
||||
bool *out_of_bound) override {
|
||||
Learner* Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
|
||||
bool* out_of_bound) override {
|
||||
this->Configure();
|
||||
this->CheckModelInitialized();
|
||||
|
||||
CHECK_NE(this->learner_model_param_.num_feature, 0);
|
||||
CHECK_GE(begin_layer, 0);
|
||||
auto *out_impl = new LearnerImpl({});
|
||||
out_impl->learner_model_param_ = this->learner_model_param_;
|
||||
out_impl->generic_parameters_ = this->generic_parameters_;
|
||||
auto* out_impl = new LearnerImpl({});
|
||||
out_impl->learner_model_param_.Copy(this->learner_model_param_);
|
||||
out_impl->ctx_ = this->ctx_;
|
||||
auto gbm = std::unique_ptr<GradientBooster>(GradientBooster::Create(
|
||||
this->tparam_.booster, &out_impl->generic_parameters_,
|
||||
&out_impl->learner_model_param_));
|
||||
this->tparam_.booster, &out_impl->ctx_, &out_impl->learner_model_param_));
|
||||
this->gbm_->Slice(begin_layer, end_layer, step, gbm.get(), out_of_bound);
|
||||
out_impl->gbm_ = std::move(gbm);
|
||||
|
||||
Json config { Object() };
|
||||
Json config{Object()};
|
||||
this->SaveConfig(&config);
|
||||
out_impl->mparam_ = this->mparam_;
|
||||
out_impl->attributes_ = this->attributes_;
|
||||
@@ -1156,15 +1285,17 @@ class LearnerImpl : public LearnerIO {
|
||||
monitor_.Start("UpdateOneIter");
|
||||
TrainingObserver::Instance().Update(iter);
|
||||
this->Configure();
|
||||
if (generic_parameters_.seed_per_iteration) {
|
||||
common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
|
||||
this->InitBaseScore(train.get());
|
||||
|
||||
if (ctx_.seed_per_iteration) {
|
||||
common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
|
||||
}
|
||||
|
||||
this->CheckDataSplitMode();
|
||||
this->ValidateDMatrix(train.get(), true);
|
||||
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
auto& predt = local_cache->Cache(train, generic_parameters_.gpu_id);
|
||||
auto& predt = local_cache->Cache(train, ctx_.gpu_id);
|
||||
|
||||
monitor_.Start("PredictRaw");
|
||||
this->PredictRaw(train.get(), &predt, true, 0, 0);
|
||||
@@ -1184,14 +1315,18 @@ class LearnerImpl : public LearnerIO {
|
||||
HostDeviceVector<GradientPair>* in_gpair) override {
|
||||
monitor_.Start("BoostOneIter");
|
||||
this->Configure();
|
||||
if (generic_parameters_.seed_per_iteration) {
|
||||
common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
|
||||
// Should have been set to default in the first prediction.
|
||||
CHECK(mparam_.base_score_estimated);
|
||||
|
||||
if (ctx_.seed_per_iteration) {
|
||||
common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
|
||||
}
|
||||
|
||||
this->CheckDataSplitMode();
|
||||
this->ValidateDMatrix(train.get(), true);
|
||||
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
local_cache->Cache(train, generic_parameters_.gpu_id);
|
||||
local_cache->Cache(train, ctx_.gpu_id);
|
||||
|
||||
gbm_->DoBoost(train.get(), in_gpair, &local_cache->Entry(train.get()), obj_.get());
|
||||
monitor_.Stop("BoostOneIter");
|
||||
@@ -1202,23 +1337,24 @@ class LearnerImpl : public LearnerIO {
|
||||
const std::vector<std::string>& data_names) override {
|
||||
monitor_.Start("EvalOneIter");
|
||||
this->Configure();
|
||||
this->CheckModelInitialized();
|
||||
|
||||
std::ostringstream os;
|
||||
os.precision(std::numeric_limits<double>::max_digits10);
|
||||
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
|
||||
if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
|
||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
|
||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
|
||||
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
|
||||
}
|
||||
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
for (size_t i = 0; i < data_sets.size(); ++i) {
|
||||
std::shared_ptr<DMatrix> m = data_sets[i];
|
||||
auto &predt = local_cache->Cache(m, generic_parameters_.gpu_id);
|
||||
auto &predt = local_cache->Cache(m, ctx_.gpu_id);
|
||||
this->ValidateDMatrix(m.get(), false);
|
||||
this->PredictRaw(m.get(), &predt, false, 0, 0);
|
||||
|
||||
auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
|
||||
auto &out = output_predictions_.Cache(m, ctx_.gpu_id).predictions;
|
||||
out.Resize(predt.predictions.Size());
|
||||
out.Copy(predt.predictions);
|
||||
|
||||
@@ -1241,6 +1377,9 @@ class LearnerImpl : public LearnerIO {
|
||||
static_cast<int>(pred_interactions) +
|
||||
static_cast<int>(pred_contribs);
|
||||
this->Configure();
|
||||
this->InitBaseScore(nullptr);
|
||||
this->CheckModelInitialized();
|
||||
|
||||
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
|
||||
if (pred_contribs) {
|
||||
gbm_->PredictContribution(data.get(), out_preds, layer_begin, layer_end, approx_contribs);
|
||||
@@ -1251,10 +1390,10 @@ class LearnerImpl : public LearnerIO {
|
||||
gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);
|
||||
} else {
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
auto& prediction = local_cache->Cache(data, generic_parameters_.gpu_id);
|
||||
auto& prediction = local_cache->Cache(data, ctx_.gpu_id);
|
||||
this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end);
|
||||
// Copy the prediction cache to output prediction. out_preds comes from C API
|
||||
out_preds->SetDevice(generic_parameters_.gpu_id);
|
||||
out_preds->SetDevice(ctx_.gpu_id);
|
||||
out_preds->Resize(prediction.predictions.Size());
|
||||
out_preds->Copy(prediction.predictions);
|
||||
if (!output_margin) {
|
||||
@@ -1268,8 +1407,10 @@ class LearnerImpl : public LearnerIO {
|
||||
CHECK(!this->need_configuration_);
|
||||
return this->gbm_->BoostedRounds();
|
||||
}
|
||||
|
||||
uint32_t Groups() const override {
|
||||
CHECK(!this->need_configuration_);
|
||||
this->CheckModelInitialized();
|
||||
return this->learner_model_param_.num_output_group;
|
||||
}
|
||||
|
||||
@@ -1281,6 +1422,9 @@ class LearnerImpl : public LearnerIO {
|
||||
HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
|
||||
uint32_t iteration_end) override {
|
||||
this->Configure();
|
||||
this->InitBaseScore(nullptr);
|
||||
this->CheckModelInitialized();
|
||||
|
||||
auto& out_predictions = this->GetThreadLocal().prediction_entry;
|
||||
this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
|
||||
if (type == PredictionType::kValue) {
|
||||
@@ -1296,6 +1440,8 @@ class LearnerImpl : public LearnerIO {
|
||||
void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
|
||||
std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
|
||||
this->Configure();
|
||||
this->CheckModelInitialized();
|
||||
|
||||
gbm_->FeatureScore(importance_type, trees, features, scores);
|
||||
}
|
||||
|
||||
@@ -1315,17 +1461,17 @@ class LearnerImpl : public LearnerIO {
|
||||
void PredictRaw(DMatrix *data, PredictionCacheEntry *out_preds, bool training,
|
||||
unsigned layer_begin, unsigned layer_end) const {
|
||||
CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
|
||||
this->CheckModelInitialized();
|
||||
this->ValidateDMatrix(data, false);
|
||||
gbm_->PredictBatch(data, out_preds, training, layer_begin, layer_end);
|
||||
}
|
||||
|
||||
void ValidateDMatrix(DMatrix* p_fmat, bool is_training) const {
|
||||
MetaInfo const& info = p_fmat->Info();
|
||||
info.Validate(generic_parameters_.gpu_id);
|
||||
info.Validate(ctx_.gpu_id);
|
||||
|
||||
auto const row_based_split = [this]() {
|
||||
return tparam_.dsplit == DataSplitMode::kRow ||
|
||||
tparam_.dsplit == DataSplitMode::kAuto;
|
||||
return tparam_.dsplit == DataSplitMode::kRow || tparam_.dsplit == DataSplitMode::kAuto;
|
||||
};
|
||||
if (row_based_split()) {
|
||||
if (is_training) {
|
||||
|
||||
Reference in New Issue
Block a user