Fix loading old logit model, helper for converting old pickle. (#5281)
* Fix loading old logit model. * Add a helper script for converting old pickle file. * Add version as a model parameter. * Remove the size check in R test to relax the size constraint. * Add missing R doc for passing linting. Run devtools. * Cleanup old model IO logic. * Test compatibility on CI. * Make the argument as required.
This commit is contained in:
parent
5ca21f252a
commit
213f4fa45a
@ -49,7 +49,7 @@ option(USE_SANITIZER "Use santizer flags" OFF)
|
|||||||
option(SANITIZER_PATH "Path to sanitizes.")
|
option(SANITIZER_PATH "Path to sanitizes.")
|
||||||
set(ENABLED_SANITIZERS "address" "leak" CACHE STRING
|
set(ENABLED_SANITIZERS "address" "leak" CACHE STRING
|
||||||
"Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are
|
"Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are
|
||||||
address, leak and thread.")
|
address, leak, undefined and thread.")
|
||||||
## Plugins
|
## Plugins
|
||||||
option(PLUGIN_LZ4 "Build lz4 plugin" OFF)
|
option(PLUGIN_LZ4 "Build lz4 plugin" OFF)
|
||||||
option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF)
|
option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF)
|
||||||
|
|||||||
@ -139,6 +139,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
|||||||
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
|
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
|
||||||
#' prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
#' prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
||||||
#' or predinteraction flags is TRUE.
|
#' or predinteraction flags is TRUE.
|
||||||
|
#' @param training whether is the prediction result used for training. For dart booster,
|
||||||
|
#' training predicting will perform dropout.
|
||||||
#' @param ... Parameters passed to \code{predict.xgb.Booster}
|
#' @param ... Parameters passed to \code{predict.xgb.Booster}
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
|
|||||||
@ -49,6 +49,9 @@ It will use all the trees by default (\code{NULL} value).}
|
|||||||
prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
||||||
or predinteraction flags is TRUE.}
|
or predinteraction flags is TRUE.}
|
||||||
|
|
||||||
|
\item{training}{whether is the prediction result used for training. For dart booster,
|
||||||
|
training predicting will perform dropout.}
|
||||||
|
|
||||||
\item{...}{Parameters passed to \code{predict.xgb.Booster}}
|
\item{...}{Parameters passed to \code{predict.xgb.Booster}}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
|
|||||||
@ -31,7 +31,6 @@ num_round <- 2
|
|||||||
test_that("custom objective works", {
|
test_that("custom objective works", {
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(length(bst$raw), 1100)
|
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_false(is.null(bst$evaluation_log$eval_error))
|
expect_false(is.null(bst$evaluation_log$eval_error))
|
||||||
expect_lt(bst$evaluation_log[num_round, eval_error], 0.03)
|
expect_lt(bst$evaluation_log[num_round, eval_error], 0.03)
|
||||||
@ -58,5 +57,4 @@ test_that("custom objective using DMatrix attr works", {
|
|||||||
param$objective = logregobjattr
|
param$objective = logregobjattr
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(length(bst$raw), 1100)
|
|
||||||
})
|
})
|
||||||
|
|||||||
79
doc/python/convert_090to100.py
Normal file
79
doc/python/convert_090to100.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
'''This is a simple script that converts a pickled XGBoost
|
||||||
|
Scikit-Learn interface object from 0.90 to a native model. Pickle
|
||||||
|
format is not stable as it's a direct serialization of Python object.
|
||||||
|
We advice not to use it when stability is needed.
|
||||||
|
|
||||||
|
'''
|
||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import numpy as np
|
||||||
|
import xgboost
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
|
def save_label_encoder(le):
|
||||||
|
'''Save the label encoder in XGBClassifier'''
|
||||||
|
meta = dict()
|
||||||
|
for k, v in le.__dict__.items():
|
||||||
|
if isinstance(v, np.ndarray):
|
||||||
|
meta[k] = v.tolist()
|
||||||
|
else:
|
||||||
|
meta[k] = v
|
||||||
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
def xgboost_skl_90to100(skl_model):
|
||||||
|
'''Extract the model and related metadata in SKL model.'''
|
||||||
|
model = {}
|
||||||
|
with open(skl_model, 'rb') as fd:
|
||||||
|
old = pickle.load(fd)
|
||||||
|
if not isinstance(old, xgboost.XGBModel):
|
||||||
|
raise TypeError(
|
||||||
|
'The script only handes Scikit-Learn interface object')
|
||||||
|
|
||||||
|
# Save Scikit-Learn specific Python attributes into a JSON document.
|
||||||
|
for k, v in old.__dict__.items():
|
||||||
|
if k == '_le':
|
||||||
|
model[k] = save_label_encoder(v)
|
||||||
|
elif k == 'classes_':
|
||||||
|
model[k] = v.tolist()
|
||||||
|
elif k == '_Booster':
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
json.dumps({k: v})
|
||||||
|
model[k] = v
|
||||||
|
except TypeError:
|
||||||
|
warnings.warn(str(k) + ' is not saved in Scikit-Learn meta.')
|
||||||
|
booster = old.get_booster()
|
||||||
|
# Store the JSON serialization as an attribute
|
||||||
|
booster.set_attr(scikit_learn=json.dumps(model))
|
||||||
|
|
||||||
|
# Save it into a native model.
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
path = 'xgboost_native_model_from_' + skl_model + '-' + str(i) + '.bin'
|
||||||
|
if os.path.exists(path):
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
booster.save_model(path)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
assert xgboost.__version__ != '1.0.0', ('Please use the XGBoost version'
|
||||||
|
' that generates this pickle.')
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=('A simple script to convert pickle generated by'
|
||||||
|
' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).'))
|
||||||
|
parser.add_argument(
|
||||||
|
'--old-pickle',
|
||||||
|
type=str,
|
||||||
|
help='Path to old pickle file of Scikit-Learn interface object. '
|
||||||
|
'Will output a native model converted from this pickle file',
|
||||||
|
required=True)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
xgboost_skl_90to100(args.old_pickle)
|
||||||
@ -91,7 +91,12 @@ Loading pickled file from different version of XGBoost
|
|||||||
|
|
||||||
As noted, pickled model is neither portable nor stable, but in some cases the pickled
|
As noted, pickled model is neither portable nor stable, but in some cases the pickled
|
||||||
models are valuable. One way to restore it in the future is to load it back with that
|
models are valuable. One way to restore it in the future is to load it back with that
|
||||||
specific version of Python and XGBoost, export the model by calling `save_model`.
|
specific version of Python and XGBoost, export the model by calling `save_model`. To help
|
||||||
|
easing the mitigation, we created a simple script for converting pickled XGBoost 0.90
|
||||||
|
Scikit-Learn interface object to XGBoost 1.0.0 native model. Please note that the script
|
||||||
|
suits simple use cases, and it's advised not to use pickle when stability is needed.
|
||||||
|
It's located in ``xgboost/doc/python`` with the name ``convert_090to100.py``. See
|
||||||
|
comments in the script for more details.
|
||||||
|
|
||||||
********************************************************
|
********************************************************
|
||||||
Saving and Loading the internal parameters configuration
|
Saving and Loading the internal parameters configuration
|
||||||
|
|||||||
@ -208,6 +208,8 @@ struct LearnerModelParam {
|
|||||||
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
|
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
|
||||||
// this one as an immutable copy.
|
// this one as an immutable copy.
|
||||||
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
|
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
|
||||||
|
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
|
||||||
|
bool Initialized() const { return num_feature != 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -600,6 +600,7 @@ class DaskXGBRegressor(DaskScikitLearnBase):
|
|||||||
results = train(self.client, params, dtrain,
|
results = train(self.client, params, dtrain,
|
||||||
num_boost_round=self.get_num_boosting_rounds(),
|
num_boost_round=self.get_num_boosting_rounds(),
|
||||||
evals=evals)
|
evals=evals)
|
||||||
|
# pylint: disable=attribute-defined-outside-init
|
||||||
self._Booster = results['booster']
|
self._Booster = results['booster']
|
||||||
# pylint: disable=attribute-defined-outside-init
|
# pylint: disable=attribute-defined-outside-init
|
||||||
self.evals_result_ = results['history']
|
self.evals_result_ = results['history']
|
||||||
|
|||||||
195
src/learner.cc
195
src/learner.cc
@ -1,5 +1,5 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2014-2019 by Contributors
|
* Copyright 2014-2020 by Contributors
|
||||||
* \file learner.cc
|
* \file learner.cc
|
||||||
* \brief Implementation of learning algorithm.
|
* \brief Implementation of learning algorithm.
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
@ -67,19 +67,26 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
|||||||
/* \brief global bias */
|
/* \brief global bias */
|
||||||
bst_float base_score;
|
bst_float base_score;
|
||||||
/* \brief number of features */
|
/* \brief number of features */
|
||||||
unsigned num_feature;
|
uint32_t num_feature;
|
||||||
/* \brief number of classes, if it is multi-class classification */
|
/* \brief number of classes, if it is multi-class classification */
|
||||||
int num_class;
|
int32_t num_class;
|
||||||
/*! \brief Model contain additional properties */
|
/*! \brief Model contain additional properties */
|
||||||
int contain_extra_attrs;
|
int32_t contain_extra_attrs;
|
||||||
/*! \brief Model contain eval metrics */
|
/*! \brief Model contain eval metrics */
|
||||||
int contain_eval_metrics;
|
int32_t contain_eval_metrics;
|
||||||
|
/*! \brief the version of XGBoost. */
|
||||||
|
uint32_t major_version;
|
||||||
|
uint32_t minor_version;
|
||||||
/*! \brief reserved field */
|
/*! \brief reserved field */
|
||||||
int reserved[29];
|
int reserved[27];
|
||||||
/*! \brief constructor */
|
/*! \brief constructor */
|
||||||
LearnerModelParamLegacy() {
|
LearnerModelParamLegacy() {
|
||||||
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
|
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
|
||||||
base_score = 0.5f;
|
base_score = 0.5f;
|
||||||
|
major_version = std::get<0>(Version::Self());
|
||||||
|
minor_version = std::get<1>(Version::Self());
|
||||||
|
static_assert(sizeof(LearnerModelParamLegacy) == 136,
|
||||||
|
"Do not change the size of this struct, as it will break binary IO.");
|
||||||
}
|
}
|
||||||
// Skip other legacy fields.
|
// Skip other legacy fields.
|
||||||
Json ToJson() const {
|
Json ToJson() const {
|
||||||
@ -117,8 +124,9 @@ LearnerModelParam::LearnerModelParam(
|
|||||||
LearnerModelParamLegacy const &user_param, float base_margin)
|
LearnerModelParamLegacy const &user_param, float base_margin)
|
||||||
: base_score{base_margin}, num_feature{user_param.num_feature},
|
: base_score{base_margin}, num_feature{user_param.num_feature},
|
||||||
num_output_group{user_param.num_class == 0
|
num_output_group{user_param.num_class == 0
|
||||||
? 1
|
? 1
|
||||||
: static_cast<uint32_t>(user_param.num_class)} {}
|
: static_cast<uint32_t>(user_param.num_class)}
|
||||||
|
{}
|
||||||
|
|
||||||
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
||||||
// data split mode, can be row, col, or none.
|
// data split mode, can be row, col, or none.
|
||||||
@ -140,7 +148,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
|||||||
.describe("Data split mode for distributed training.");
|
.describe("Data split mode for distributed training.");
|
||||||
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
||||||
.set_default(0)
|
.set_default(0)
|
||||||
.describe("flag to disable default metric. Set to >0 to disable");
|
.describe("Flag to disable default metric. Set to >0 to disable");
|
||||||
DMLC_DECLARE_FIELD(booster)
|
DMLC_DECLARE_FIELD(booster)
|
||||||
.set_default("gbtree")
|
.set_default("gbtree")
|
||||||
.describe("Gradient booster used for training.");
|
.describe("Gradient booster used for training.");
|
||||||
@ -200,6 +208,7 @@ class LearnerImpl : public Learner {
|
|||||||
Args args = {cfg_.cbegin(), cfg_.cend()};
|
Args args = {cfg_.cbegin(), cfg_.cend()};
|
||||||
|
|
||||||
tparam_.UpdateAllowUnknown(args);
|
tparam_.UpdateAllowUnknown(args);
|
||||||
|
auto mparam_backup = mparam_;
|
||||||
mparam_.UpdateAllowUnknown(args);
|
mparam_.UpdateAllowUnknown(args);
|
||||||
generic_parameters_.UpdateAllowUnknown(args);
|
generic_parameters_.UpdateAllowUnknown(args);
|
||||||
generic_parameters_.CheckDeprecated();
|
generic_parameters_.CheckDeprecated();
|
||||||
@ -217,17 +226,33 @@ class LearnerImpl : public Learner {
|
|||||||
|
|
||||||
// set seed only before the model is initialized
|
// set seed only before the model is initialized
|
||||||
common::GlobalRandom().seed(generic_parameters_.seed);
|
common::GlobalRandom().seed(generic_parameters_.seed);
|
||||||
|
|
||||||
// must precede configure gbm since num_features is required for gbm
|
// must precede configure gbm since num_features is required for gbm
|
||||||
this->ConfigureNumFeatures();
|
this->ConfigureNumFeatures();
|
||||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||||
this->ConfigureObjective(old_tparam, &args);
|
this->ConfigureObjective(old_tparam, &args);
|
||||||
this->ConfigureGBM(old_tparam, args);
|
|
||||||
this->ConfigureMetrics(args);
|
|
||||||
|
|
||||||
|
// Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
|
||||||
|
// After 1.0.0 we save the value provided by user and keep it immutable instead. To
|
||||||
|
// keep the stability, we initialize it in binary LoadModel instead of configuration.
|
||||||
|
// Under what condition should we omit the transformation:
|
||||||
|
//
|
||||||
|
// - base_score is loaded from old binary model.
|
||||||
|
//
|
||||||
|
// What are the other possible conditions:
|
||||||
|
//
|
||||||
|
// - model loaded from new binary or JSON.
|
||||||
|
// - model is created from scratch.
|
||||||
|
// - model is configured second time due to change of parameter
|
||||||
|
if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) {
|
||||||
|
learner_model_param_ = LearnerModelParam(mparam_,
|
||||||
|
obj_->ProbToMargin(mparam_.base_score));
|
||||||
|
}
|
||||||
|
|
||||||
|
this->ConfigureGBM(old_tparam, args);
|
||||||
generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
|
generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
|
||||||
|
|
||||||
learner_model_param_ = LearnerModelParam(mparam_,
|
this->ConfigureMetrics(args);
|
||||||
obj_->ProbToMargin(mparam_.base_score));
|
|
||||||
|
|
||||||
this->need_configuration_ = false;
|
this->need_configuration_ = false;
|
||||||
if (generic_parameters_.validate_parameters) {
|
if (generic_parameters_.validate_parameters) {
|
||||||
@ -337,9 +362,6 @@ class LearnerImpl : public Learner {
|
|||||||
cache_));
|
cache_));
|
||||||
gbm_->LoadModel(gradient_booster);
|
gbm_->LoadModel(gradient_booster);
|
||||||
|
|
||||||
learner_model_param_ = LearnerModelParam(mparam_,
|
|
||||||
obj_->ProbToMargin(mparam_.base_score));
|
|
||||||
|
|
||||||
auto const& j_attributes = get<Object const>(learner.at("attributes"));
|
auto const& j_attributes = get<Object const>(learner.at("attributes"));
|
||||||
attributes_.clear();
|
attributes_.clear();
|
||||||
for (auto const& kv : j_attributes) {
|
for (auto const& kv : j_attributes) {
|
||||||
@ -459,6 +481,7 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (header[0] == '{') {
|
if (header[0] == '{') {
|
||||||
|
// Dispatch to JSON
|
||||||
auto json_stream = common::FixedSizeStream(&fp);
|
auto json_stream = common::FixedSizeStream(&fp);
|
||||||
std::string buffer;
|
std::string buffer;
|
||||||
json_stream.Take(&buffer);
|
json_stream.Take(&buffer);
|
||||||
@ -471,25 +494,10 @@ class LearnerImpl : public Learner {
|
|||||||
// read parameter
|
// read parameter
|
||||||
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
|
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
|
||||||
<< "BoostLearner: wrong model format";
|
<< "BoostLearner: wrong model format";
|
||||||
{
|
|
||||||
// backward compatibility code for compatible with old model type
|
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
|
||||||
// for new model, Read(&name_obj_) is suffice
|
|
||||||
uint64_t len;
|
|
||||||
CHECK_EQ(fi->Read(&len, sizeof(len)), sizeof(len));
|
|
||||||
if (len >= std::numeric_limits<unsigned>::max()) {
|
|
||||||
int gap;
|
|
||||||
CHECK_EQ(fi->Read(&gap, sizeof(gap)), sizeof(gap))
|
|
||||||
<< "BoostLearner: wrong model format";
|
|
||||||
len = len >> static_cast<uint64_t>(32UL);
|
|
||||||
}
|
|
||||||
if (len != 0) {
|
|
||||||
tparam_.objective.resize(len);
|
|
||||||
CHECK_EQ(fi->Read(&tparam_.objective[0], len), len)
|
|
||||||
<< "BoostLearner: wrong model format";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
||||||
// duplicated code with LazyInitModel
|
|
||||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
||||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
|
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
|
||||||
&learner_model_param_, cache_));
|
&learner_model_param_, cache_));
|
||||||
@ -508,34 +516,57 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
|
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
|
||||||
}
|
}
|
||||||
if (tparam_.objective == "count:poisson") {
|
bool warn_old_model { false };
|
||||||
std::string max_delta_step;
|
if (attributes_.find("count_poisson_max_delta_step") != attributes_.cend()) {
|
||||||
fi->Read(&max_delta_step);
|
// Loading model from < 1.0.0, objective is not saved.
|
||||||
cfg_["max_delta_step"] = max_delta_step;
|
cfg_["max_delta_step"] = attributes_.at("count_poisson_max_delta_step");
|
||||||
|
attributes_.erase("count_poisson_max_delta_step");
|
||||||
|
warn_old_model = true;
|
||||||
|
} else {
|
||||||
|
warn_old_model = false;
|
||||||
}
|
}
|
||||||
if (mparam_.contain_eval_metrics != 0) {
|
|
||||||
std::vector<std::string> metr;
|
if (mparam_.major_version >= 1) {
|
||||||
fi->Read(&metr);
|
learner_model_param_ = LearnerModelParam(mparam_,
|
||||||
for (auto name : metr) {
|
obj_->ProbToMargin(mparam_.base_score));
|
||||||
metrics_.emplace_back(Metric::Create(name, &generic_parameters_));
|
} else {
|
||||||
|
// Before 1.0.0, base_score is saved as a transformed value, and there's no version
|
||||||
|
// attribute in the saved model.
|
||||||
|
learner_model_param_ = LearnerModelParam(mparam_, mparam_.base_score);
|
||||||
|
warn_old_model = true;
|
||||||
|
}
|
||||||
|
if (attributes_.find("objective") != attributes_.cend()) {
|
||||||
|
auto obj_str = attributes_.at("objective");
|
||||||
|
auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
|
||||||
|
obj_->LoadConfig(j_obj);
|
||||||
|
attributes_.erase("objective");
|
||||||
|
} else {
|
||||||
|
warn_old_model = true;
|
||||||
|
}
|
||||||
|
if (attributes_.find("metrics") != attributes_.cend()) {
|
||||||
|
auto metrics_str = attributes_.at("metrics");
|
||||||
|
std::vector<std::string> names { common::Split(metrics_str, ';') };
|
||||||
|
attributes_.erase("metrics");
|
||||||
|
for (auto const& n : names) {
|
||||||
|
this->SetParam(kEvalMetric, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (warn_old_model) {
|
||||||
|
LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it "
|
||||||
|
"again for improved compatibility";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Renew the version.
|
||||||
|
mparam_.major_version = std::get<0>(Version::Self());
|
||||||
|
mparam_.minor_version = std::get<1>(Version::Self());
|
||||||
|
|
||||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||||
|
|
||||||
auto n = tparam_.__DICT__();
|
auto n = tparam_.__DICT__();
|
||||||
cfg_.insert(n.cbegin(), n.cend());
|
cfg_.insert(n.cbegin(), n.cend());
|
||||||
|
|
||||||
Args args = {cfg_.cbegin(), cfg_.cend()};
|
|
||||||
generic_parameters_.UpdateAllowUnknown(args);
|
|
||||||
gbm_->Configure(args);
|
|
||||||
obj_->Configure({cfg_.begin(), cfg_.end()});
|
|
||||||
|
|
||||||
for (auto& p_metric : metrics_) {
|
|
||||||
p_metric->Configure({cfg_.begin(), cfg_.end()});
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy dsplit from config since it will not run again during restore
|
// copy dsplit from config since it will not run again during restore
|
||||||
if (tparam_.dsplit == DataSplitMode::kAuto && rabit::IsDistributed()) {
|
if (tparam_.dsplit == DataSplitMode::kAuto && rabit::IsDistributed()) {
|
||||||
tparam_.dsplit = DataSplitMode::kRow;
|
tparam_.dsplit = DataSplitMode::kRow;
|
||||||
@ -552,15 +583,8 @@ class LearnerImpl : public Learner {
|
|||||||
void SaveModel(dmlc::Stream* fo) const override {
|
void SaveModel(dmlc::Stream* fo) const override {
|
||||||
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
|
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
|
||||||
std::vector<std::pair<std::string, std::string> > extra_attr;
|
std::vector<std::pair<std::string, std::string> > extra_attr;
|
||||||
// extra attributed to be added just before saving
|
mparam.contain_extra_attrs = 1;
|
||||||
if (tparam_.objective == "count:poisson") {
|
|
||||||
auto it = cfg_.find("max_delta_step");
|
|
||||||
if (it != cfg_.end()) {
|
|
||||||
// write `max_delta_step` parameter as extra attribute of booster
|
|
||||||
mparam.contain_extra_attrs = 1;
|
|
||||||
extra_attr.emplace_back("count_poisson_max_delta_step", it->second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
std::vector<std::string> saved_params;
|
std::vector<std::string> saved_params;
|
||||||
// check if rabit_bootstrap_cache were set to non zero before adding to checkpoint
|
// check if rabit_bootstrap_cache were set to non zero before adding to checkpoint
|
||||||
@ -577,6 +601,24 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
// Similar to JSON model IO, we save the objective.
|
||||||
|
Json j_obj { Object() };
|
||||||
|
obj_->SaveConfig(&j_obj);
|
||||||
|
std::string obj_doc;
|
||||||
|
Json::Dump(j_obj, &obj_doc);
|
||||||
|
extra_attr.emplace_back("objective", obj_doc);
|
||||||
|
}
|
||||||
|
// As of 1.0.0, JVM Package and R Package uses Save/Load model for serialization.
|
||||||
|
// Remove this part once they are ported to use actual serialization methods.
|
||||||
|
if (mparam.contain_eval_metrics != 0) {
|
||||||
|
std::stringstream os;
|
||||||
|
for (auto& ev : metrics_) {
|
||||||
|
os << ev->Name() << ";";
|
||||||
|
}
|
||||||
|
extra_attr.emplace_back("metrics", os.str());
|
||||||
|
}
|
||||||
|
|
||||||
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
|
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
|
||||||
fo->Write(tparam_.objective);
|
fo->Write(tparam_.objective);
|
||||||
fo->Write(tparam_.booster);
|
fo->Write(tparam_.booster);
|
||||||
@ -587,26 +629,7 @@ class LearnerImpl : public Learner {
|
|||||||
attr[kv.first] = kv.second;
|
attr[kv.first] = kv.second;
|
||||||
}
|
}
|
||||||
fo->Write(std::vector<std::pair<std::string, std::string>>(
|
fo->Write(std::vector<std::pair<std::string, std::string>>(
|
||||||
attr.begin(), attr.end()));
|
attr.begin(), attr.end()));
|
||||||
}
|
|
||||||
if (tparam_.objective == "count:poisson") {
|
|
||||||
auto it = cfg_.find("max_delta_step");
|
|
||||||
if (it != cfg_.end()) {
|
|
||||||
fo->Write(it->second);
|
|
||||||
} else {
|
|
||||||
// recover value of max_delta_step from extra attributes
|
|
||||||
auto it2 = attributes_.find("count_poisson_max_delta_step");
|
|
||||||
const std::string max_delta_step
|
|
||||||
= (it2 != attributes_.end()) ? it2->second : kMaxDeltaStepDefaultValue;
|
|
||||||
fo->Write(max_delta_step);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mparam.contain_eval_metrics != 0) {
|
|
||||||
std::vector<std::string> metr;
|
|
||||||
for (auto& ev : metrics_) {
|
|
||||||
metr.emplace_back(ev->Name());
|
|
||||||
}
|
|
||||||
fo->Write(metr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -661,11 +684,13 @@ class LearnerImpl : public Learner {
|
|||||||
|
|
||||||
If you are loading a serialized model (like pickle in Python) generated by older
|
If you are loading a serialized model (like pickle in Python) generated by older
|
||||||
XGBoost, please export the model by calling `Booster.save_model` from that version
|
XGBoost, please export the model by calling `Booster.save_model` from that version
|
||||||
first, then load it back in current version. See:
|
first, then load it back in current version. There's a simple script for helping
|
||||||
|
the process. See:
|
||||||
|
|
||||||
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
||||||
|
|
||||||
for more details about differences between saving model and serializing.
|
for reference to the script, and more details about differences between saving model and
|
||||||
|
serializing.
|
||||||
|
|
||||||
)doc";
|
)doc";
|
||||||
int64_t sz {-1};
|
int64_t sz {-1};
|
||||||
@ -854,7 +879,8 @@ class LearnerImpl : public Learner {
|
|||||||
|
|
||||||
void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
|
void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
|
||||||
// Once binary IO is gone, NONE of these config is useful.
|
// Once binary IO is gone, NONE of these config is useful.
|
||||||
if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") {
|
if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0" &&
|
||||||
|
tparam_.objective != "multi:softprob") {
|
||||||
cfg_["num_output_group"] = cfg_["num_class"];
|
cfg_["num_output_group"] = cfg_["num_class"];
|
||||||
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
|
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
|
||||||
tparam_.objective = "multi:softmax";
|
tparam_.objective = "multi:softmax";
|
||||||
@ -919,7 +945,6 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
CHECK_NE(mparam_.num_feature, 0)
|
CHECK_NE(mparam_.num_feature, 0)
|
||||||
<< "0 feature is supplied. Are you using raw Booster interface?";
|
<< "0 feature is supplied. Are you using raw Booster interface?";
|
||||||
learner_model_param_.num_feature = mparam_.num_feature;
|
|
||||||
// Remove these once binary IO is gone.
|
// Remove these once binary IO is gone.
|
||||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||||
|
|||||||
@ -21,8 +21,9 @@ ENV GOSU_VERSION 1.10
|
|||||||
|
|
||||||
# Install Python packages
|
# Install Python packages
|
||||||
RUN \
|
RUN \
|
||||||
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh recommonmark guzzle_sphinx_theme mock \
|
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \
|
||||||
breathe matplotlib graphviz pytest scikit-learn wheel kubernetes urllib3 jsonschema && \
|
recommonmark guzzle_sphinx_theme mock breathe matplotlib graphviz \
|
||||||
|
pytest scikit-learn wheel kubernetes urllib3 jsonschema boto3 && \
|
||||||
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
|
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
|
||||||
pip install "dask[complete]"
|
pip install "dask[complete]"
|
||||||
|
|
||||||
|
|||||||
@ -180,6 +180,41 @@ TEST(Learner, JsonModelIO) {
|
|||||||
delete pp_dmat;
|
delete pp_dmat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Learner, BinaryModelIO) {
|
||||||
|
size_t constexpr kRows = 8;
|
||||||
|
int32_t constexpr kIters = 4;
|
||||||
|
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
|
||||||
|
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
|
||||||
|
p_dmat->Info().labels_.Resize(kRows);
|
||||||
|
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
|
||||||
|
learner->SetParam("eval_metric", "rmsle");
|
||||||
|
learner->Configure();
|
||||||
|
for (int32_t iter = 0; iter < kIters; ++iter) {
|
||||||
|
learner->UpdateOneIter(iter, p_dmat.get());
|
||||||
|
}
|
||||||
|
dmlc::TemporaryDirectory tempdir;
|
||||||
|
std::string const fname = tempdir.path + "binary_model_io.bin";
|
||||||
|
{
|
||||||
|
// Make sure the write is complete before loading.
|
||||||
|
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||||
|
learner->SaveModel(fo.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
learner.reset(Learner::Create({p_dmat}));
|
||||||
|
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
|
||||||
|
learner->LoadModel(fi.get());
|
||||||
|
learner->Configure();
|
||||||
|
Json config { Object() };
|
||||||
|
learner->SaveConfig(&config);
|
||||||
|
std::string config_str;
|
||||||
|
Json::Dump(config, &config_str);
|
||||||
|
ASSERT_NE(config_str.find("rmsle"), std::string::npos);
|
||||||
|
ASSERT_EQ(config_str.find("WARNING"), std::string::npos);
|
||||||
|
|
||||||
|
delete pp_dmat;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
// Tests for automatic GPU configuration.
|
// Tests for automatic GPU configuration.
|
||||||
TEST(Learner, GPUConfiguration) {
|
TEST(Learner, GPUConfiguration) {
|
||||||
|
|||||||
@ -59,6 +59,29 @@ def generate_regression_model():
|
|||||||
reg.save_model(skl_json('reg'))
|
reg.save_model(skl_json('reg'))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_logistic_model():
|
||||||
|
print('Logistic')
|
||||||
|
y = np.random.randint(0, 2, size=kRows)
|
||||||
|
assert y.max() == 1 and y.min() == 0
|
||||||
|
|
||||||
|
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||||
|
booster = xgboost.train({'tree_method': 'hist',
|
||||||
|
'num_parallel_tree': kForests,
|
||||||
|
'max_depth': kMaxDepth,
|
||||||
|
'objective': 'binary:logistic'},
|
||||||
|
num_boost_round=kRounds, dtrain=data)
|
||||||
|
booster.save_model(booster_bin('logit'))
|
||||||
|
booster.save_model(booster_json('logit'))
|
||||||
|
|
||||||
|
reg = xgboost.XGBClassifier(tree_method='hist',
|
||||||
|
num_parallel_tree=kForests,
|
||||||
|
max_depth=kMaxDepth,
|
||||||
|
n_estimators=kRounds)
|
||||||
|
reg.fit(X, y, w)
|
||||||
|
reg.save_model(skl_bin('logit'))
|
||||||
|
reg.save_model(skl_json('logit'))
|
||||||
|
|
||||||
|
|
||||||
def generate_classification_model():
|
def generate_classification_model():
|
||||||
print('Classification')
|
print('Classification')
|
||||||
y = np.random.randint(0, kClasses, size=kRows)
|
y = np.random.randint(0, kClasses, size=kRows)
|
||||||
@ -83,7 +106,7 @@ def generate_classification_model():
|
|||||||
def generate_ranking_model():
|
def generate_ranking_model():
|
||||||
print('Learning to Rank')
|
print('Learning to Rank')
|
||||||
y = np.random.randint(5, size=kRows)
|
y = np.random.randint(5, size=kRows)
|
||||||
w = np.random.randn(20)
|
w = np.random.uniform(size=20)
|
||||||
g = np.repeat(50, 20)
|
g = np.repeat(50, 20)
|
||||||
|
|
||||||
data = xgboost.DMatrix(X, y, weight=w)
|
data = xgboost.DMatrix(X, y, weight=w)
|
||||||
@ -119,6 +142,7 @@ if __name__ == '__main__':
|
|||||||
os.mkdir(target_dir)
|
os.mkdir(target_dir)
|
||||||
|
|
||||||
generate_regression_model()
|
generate_regression_model()
|
||||||
|
generate_logistic_model()
|
||||||
generate_classification_model()
|
generate_classification_model()
|
||||||
generate_ranking_model()
|
generate_ranking_model()
|
||||||
write_versions()
|
write_versions()
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
{'numpy': '1.16.4', 'xgboost': '1.0.0-SNAPSHOT'}
|
|
||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -1 +0,0 @@
|
|||||||
{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":0,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":1,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"lambda_rank_param":{"fix_list_weight":"0","num_pairsample":"1"},"name":"rank:ndcg"}},"version":[1,0,0]}
|
|
||||||
Binary file not shown.
@ -1 +0,0 @@
|
|||||||
{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[-5.37645816802978516e-01,-4.36891138553619385e-01,-6.70873284339904785e-01,-1.25496864318847656e+00,-4.07270163297653198e-01,-6.88224375247955322e-01,4.64901357889175415e-01],"default_left":[false,false,false,false,false,false,false],"id":0,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[6.49523925781250000e+00,6.53602600097656250e+00,4.57461547851562500e+00,2.30323791503906250e-01,6.39891815185546875e+00,4.40366363525390625e+00,2.28362298011779785e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.89942225813865662e-01,-1.81951093673706055e+00,2.12066125869750977e+00,-1.88245311379432678e-01,-6.10905252397060394e-02,-1.03233657777309418e-01,6.97352066636085510e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,2.89816162109375000e+02,2.14897293090820312e+02,8.68150043487548828e+00,2.81134674072265625e+02,2.12051849365234375e+02,2.84543561935424805e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-5.37645816802978516e-01,-4.36891138553619385e-01,-6.70873284339904785e-01,-1.25496864318847656e+00,-4.07270163297653198e-01,-6.88224375247955322e-01,4.64901357889175415e-01],"default_left":[false,false,false,false,false,false,false],"id":1,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[6.49523925781250000e+00,6.53602600097656250e+00,4.57461547851562500e+00,2.30323791503906250e-01,6.39891815185546875e+00,4.40366363525390625e+00,2.28362298011779785e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.89942225813865662e-01,-1.81951093673706055e+00,2.12066125869750977e+00,-1.88245311379432678e-01,-6.10905252397060394e-02,-1.03233657777309418e-01,6.97352066636085510e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,2.89816162109375000e+02,2.14897293090820312e+02,8.68150043487548828e+00,2.81134674072265625e+02,2.12051849365234375e+02,2.84543561935424805e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.77470612525939941e-01,3.31088960170745850e-01,-3.92237067222595215e-01,8.17872881889343262e-01,1.18046358227729797e-01,-3.00728023052215576e-01,-4.70518797636032104e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[5.42109680175781250e+00,1.03034389019012451e+00,3.41049194335937500e+00,0.00000000000000000e+00,1.19803142547607422e+00,4.23731803894042969e+00,4.69757843017578125e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.07929229736328125e+00,-5.09094715118408203e-01,-8.72411578893661499e-02,1.22680939733982086e-01,1.77069548517465591e-02,-4.51092049479484558e-02,-7.05778226256370544e-02],"split_indices":[3,0,3,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+02,2.13924217224121094e+00,7.72699451446533203e+00,2.30380615234375000e+02,2.64466613769531250e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.77470612525939941e-01,3.31088960170745850e-01,-3.92237067222595215e-01,8.17872881889343262e-01,1.18046358227729797e-01,-3.00728023052215576e-01,-4.70518797636032104e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[5.42109680175781250e+00,1.03034389019012451e+00,3.41049194335937500e+00,0.00000000000000000e+00,1.19803142547607422e+00,4.23731803894042969e+00,4.69757843017578125e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.07929229736328125e+00,-5.09094715118408203e-01,-8.72411578893661499e-02,1.22680939733982086e-01,1.77069548517465591e-02,-4.51092049479484558e-02,-7.05778226256370544e-02],"split_indices":[3,0,3,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+02,2.13924217224121094e+00,7.72699451446533203e+00,2.30380615234375000e+02,2.64466613769531250e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,0,0]}
|
|
||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -1 +0,0 @@
|
|||||||
{"learner":{"attributes":{"scikit_learn":"{\"n_estimators\": 2, \"objective\": \"rank:ndcg\", \"max_depth\": 2, \"learning_rate\": null, \"verbosity\": null, \"booster\": null, \"tree_method\": \"hist\", \"gamma\": null, \"min_child_weight\": null, \"max_delta_step\": null, \"subsample\": null, \"colsample_bytree\": null, \"colsample_bylevel\": null, \"colsample_bynode\": null, \"reg_alpha\": null, \"reg_lambda\": null, \"scale_pos_weight\": null, \"base_score\": null, \"missing\": NaN, \"num_parallel_tree\": 2, \"kwargs\": {}, \"random_state\": null, \"n_jobs\": null, \"monotone_constraints\": null, \"interaction_constraints\": null, \"importance_type\": \"gain\", \"gpu_id\": null, \"type\": \"XGBRanker\"}"},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":0,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":1,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"lambda_rank_param":{"fix_list_weight":"0","num_pairsample":"1"},"name":"rank:ndcg"}},"version":[1,0,0]}
|
|
||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
@ -39,7 +39,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
param = {'max_depth': 2, 'eta': 1,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
|
|||||||
@ -284,16 +284,31 @@ class TestModels(unittest.TestCase):
|
|||||||
self.assertRaises(ValueError, bst.predict, dm1)
|
self.assertRaises(ValueError, bst.predict, dm1)
|
||||||
bst.predict(dm2) # success
|
bst.predict(dm2) # success
|
||||||
|
|
||||||
|
def test_model_binary_io(self):
|
||||||
|
model_path = 'test_model_binary_io.bin'
|
||||||
|
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
|
||||||
|
'scale_pos_weight': '0.5'}
|
||||||
|
X = np.random.random((10, 3))
|
||||||
|
y = np.random.random((10,))
|
||||||
|
dtrain = xgb.DMatrix(X, y)
|
||||||
|
bst = xgb.train(parameters, dtrain, num_boost_round=2)
|
||||||
|
bst.save_model(model_path)
|
||||||
|
bst = xgb.Booster(model_file=model_path)
|
||||||
|
os.remove(model_path)
|
||||||
|
config = json.loads(bst.save_config())
|
||||||
|
assert float(config['learner']['objective'][
|
||||||
|
'reg_loss_param']['scale_pos_weight']) == 0.5
|
||||||
|
|
||||||
def test_model_json_io(self):
|
def test_model_json_io(self):
|
||||||
model_path = './model.json'
|
model_path = 'test_model_json_io.json'
|
||||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
|
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
|
||||||
j_model = json_model(model_path, parameters)
|
j_model = json_model(model_path, parameters)
|
||||||
assert isinstance(j_model['learner'], dict)
|
assert isinstance(j_model['learner'], dict)
|
||||||
|
|
||||||
bst = xgb.Booster(model_file='./model.json')
|
bst = xgb.Booster(model_file=model_path)
|
||||||
|
|
||||||
bst.save_model(fname=model_path)
|
bst.save_model(fname=model_path)
|
||||||
with open('./model.json', 'r') as fd:
|
with open(model_path, 'r') as fd:
|
||||||
j_model = json.load(fd)
|
j_model = json.load(fd)
|
||||||
assert isinstance(j_model['learner'], dict)
|
assert isinstance(j_model['learner'], dict)
|
||||||
|
|
||||||
@ -302,7 +317,7 @@ class TestModels(unittest.TestCase):
|
|||||||
@pytest.mark.skipif(**tm.no_json_schema())
|
@pytest.mark.skipif(**tm.no_json_schema())
|
||||||
def test_json_schema(self):
|
def test_json_schema(self):
|
||||||
import jsonschema
|
import jsonschema
|
||||||
model_path = './model.json'
|
model_path = 'test_json_schema.json'
|
||||||
path = os.path.dirname(
|
path = os.path.dirname(
|
||||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
doc = os.path.join(path, 'doc', 'model.schema')
|
doc = os.path.join(path, 'doc', 'model.schema')
|
||||||
|
|||||||
@ -1,47 +1,130 @@
|
|||||||
import xgboost
|
import xgboost
|
||||||
import os
|
import os
|
||||||
import generate_models as gm
|
import generate_models as gm
|
||||||
|
import json
|
||||||
|
import zipfile
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
def test_model_compability():
|
def run_model_param_check(config):
|
||||||
|
assert config['learner']['learner_model_param']['num_feature'] == str(4)
|
||||||
|
assert config['learner']['learner_train_param']['booster'] == 'gbtree'
|
||||||
|
|
||||||
|
|
||||||
|
def run_booster_check(booster, name):
|
||||||
|
config = json.loads(booster.save_config())
|
||||||
|
run_model_param_check(config)
|
||||||
|
if name.find('cls') != -1:
|
||||||
|
assert (len(booster.get_dump()) == gm.kForests * gm.kRounds *
|
||||||
|
gm.kClasses)
|
||||||
|
assert float(
|
||||||
|
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'multi:softmax'
|
||||||
|
elif name.find('logit') != -1:
|
||||||
|
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||||
|
assert config['learner']['learner_model_param']['num_class'] == str(0)
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'binary:logistic'
|
||||||
|
elif name.find('ltr') != -1:
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'rank:ndcg'
|
||||||
|
else:
|
||||||
|
assert name.find('reg') != -1
|
||||||
|
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||||
|
assert float(
|
||||||
|
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'reg:squarederror'
|
||||||
|
|
||||||
|
|
||||||
|
def run_scikit_model_check(name, path):
|
||||||
|
if name.find('reg') != -1:
|
||||||
|
reg = xgboost.XGBRegressor()
|
||||||
|
reg.load_model(path)
|
||||||
|
config = json.loads(reg.get_booster().save_config())
|
||||||
|
if name.find('0.90') != -1:
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'reg:linear'
|
||||||
|
else:
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'reg:squarederror'
|
||||||
|
assert (len(reg.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests)
|
||||||
|
run_model_param_check(config)
|
||||||
|
elif name.find('cls') != -1:
|
||||||
|
cls = xgboost.XGBClassifier()
|
||||||
|
cls.load_model(path)
|
||||||
|
if name.find('0.90') == -1:
|
||||||
|
assert len(cls.classes_) == gm.kClasses
|
||||||
|
assert len(cls._le.classes_) == gm.kClasses
|
||||||
|
assert cls.n_classes_ == gm.kClasses
|
||||||
|
assert (len(cls.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests * gm.kClasses), path
|
||||||
|
config = json.loads(cls.get_booster().save_config())
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'multi:softprob', path
|
||||||
|
run_model_param_check(config)
|
||||||
|
elif name.find('ltr') != -1:
|
||||||
|
ltr = xgboost.XGBRanker()
|
||||||
|
ltr.load_model(path)
|
||||||
|
assert (len(ltr.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests)
|
||||||
|
config = json.loads(ltr.get_booster().save_config())
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'rank:ndcg'
|
||||||
|
run_model_param_check(config)
|
||||||
|
elif name.find('logit') != -1:
|
||||||
|
logit = xgboost.XGBClassifier()
|
||||||
|
logit.load_model(path)
|
||||||
|
assert (len(logit.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests)
|
||||||
|
config = json.loads(logit.get_booster().save_config())
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'binary:logistic'
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.ci
|
||||||
|
def test_model_compatibility():
|
||||||
|
'''Test model compatibility, can only be run on CI as others don't
|
||||||
|
have the credentials.
|
||||||
|
|
||||||
|
'''
|
||||||
path = os.path.dirname(os.path.abspath(__file__))
|
path = os.path.dirname(os.path.abspath(__file__))
|
||||||
path = os.path.join(path, 'models')
|
path = os.path.join(path, 'models')
|
||||||
|
try:
|
||||||
|
import boto3
|
||||||
|
import botocore
|
||||||
|
except ImportError:
|
||||||
|
pytest.skip(
|
||||||
|
'Skiping compatibility tests as boto3 is not installed.')
|
||||||
|
|
||||||
|
try:
|
||||||
|
s3_bucket = boto3.resource('s3').Bucket('xgboost-ci-jenkins-artifacts')
|
||||||
|
zip_path = 'xgboost_model_compatibility_test.zip'
|
||||||
|
s3_bucket.download_file(zip_path, zip_path)
|
||||||
|
except botocore.exceptions.NoCredentialsError:
|
||||||
|
pytest.skip(
|
||||||
|
'Skiping compatibility tests as running on non-CI environment.')
|
||||||
|
|
||||||
|
with zipfile.ZipFile(zip_path, 'r') as z:
|
||||||
|
z.extractall(path)
|
||||||
|
|
||||||
models = [
|
models = [
|
||||||
os.path.join(root, f) for root, subdir, files in os.walk(path)
|
os.path.join(root, f) for root, subdir, files in os.walk(path)
|
||||||
for f in files
|
for f in files
|
||||||
if f != 'version'
|
if f != 'version'
|
||||||
]
|
]
|
||||||
assert len(models) == 12
|
assert models
|
||||||
|
|
||||||
for path in models:
|
for path in models:
|
||||||
name = os.path.basename(path)
|
name = os.path.basename(path)
|
||||||
if name.startswith('xgboost-'):
|
if name.startswith('xgboost-'):
|
||||||
booster = xgboost.Booster(model_file=path)
|
booster = xgboost.Booster(model_file=path)
|
||||||
if name.find('cls') != -1:
|
run_booster_check(booster, name)
|
||||||
assert (len(booster.get_dump()) ==
|
|
||||||
gm.kForests * gm.kRounds * gm.kClasses)
|
|
||||||
else:
|
|
||||||
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
|
||||||
elif name.startswith('xgboost_scikit'):
|
elif name.startswith('xgboost_scikit'):
|
||||||
if name.find('reg') != -1:
|
run_scikit_model_check(name, path)
|
||||||
reg = xgboost.XGBRegressor()
|
|
||||||
reg.load_model(path)
|
|
||||||
assert (len(reg.get_booster().get_dump()) ==
|
|
||||||
gm.kRounds * gm.kForests)
|
|
||||||
elif name.find('cls') != -1:
|
|
||||||
cls = xgboost.XGBClassifier()
|
|
||||||
cls.load_model(path)
|
|
||||||
assert len(cls.classes_) == gm.kClasses
|
|
||||||
assert len(cls._le.classes_) == gm.kClasses
|
|
||||||
assert cls.n_classes_ == gm.kClasses
|
|
||||||
assert (len(cls.get_booster().get_dump()) ==
|
|
||||||
gm.kRounds * gm.kForests * gm.kClasses), path
|
|
||||||
elif name.find('ltr') != -1:
|
|
||||||
ltr = xgboost.XGBRanker()
|
|
||||||
ltr.load_model(path)
|
|
||||||
assert (len(ltr.get_booster().get_dump()) ==
|
|
||||||
gm.kRounds * gm.kForests)
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user