Merge model compatibility fixes from 1.0rc branch. (#5305)
* Port test model compatibility. * Port logit model fix. https://github.com/dmlc/xgboost/pull/5248 https://github.com/dmlc/xgboost/pull/5281
This commit is contained in:
parent
29eeea709a
commit
911a902835
@ -49,7 +49,7 @@ option(USE_SANITIZER "Use santizer flags" OFF)
|
|||||||
option(SANITIZER_PATH "Path to sanitizes.")
|
option(SANITIZER_PATH "Path to sanitizes.")
|
||||||
set(ENABLED_SANITIZERS "address" "leak" CACHE STRING
|
set(ENABLED_SANITIZERS "address" "leak" CACHE STRING
|
||||||
"Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are
|
"Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are
|
||||||
address, leak and thread.")
|
address, leak, undefined and thread.")
|
||||||
## Plugins
|
## Plugins
|
||||||
option(PLUGIN_LZ4 "Build lz4 plugin" OFF)
|
option(PLUGIN_LZ4 "Build lz4 plugin" OFF)
|
||||||
option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF)
|
option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF)
|
||||||
|
|||||||
@ -139,6 +139,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
|||||||
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
|
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
|
||||||
#' prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
#' prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
||||||
#' or predinteraction flags is TRUE.
|
#' or predinteraction flags is TRUE.
|
||||||
|
#' @param training whether is the prediction result used for training. For dart booster,
|
||||||
|
#' training predicting will perform dropout.
|
||||||
#' @param ... Parameters passed to \code{predict.xgb.Booster}
|
#' @param ... Parameters passed to \code{predict.xgb.Booster}
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
|
|||||||
@ -49,6 +49,9 @@ It will use all the trees by default (\code{NULL} value).}
|
|||||||
prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
|
||||||
or predinteraction flags is TRUE.}
|
or predinteraction flags is TRUE.}
|
||||||
|
|
||||||
|
\item{training}{whether is the prediction result used for training. For dart booster,
|
||||||
|
training predicting will perform dropout.}
|
||||||
|
|
||||||
\item{...}{Parameters passed to \code{predict.xgb.Booster}}
|
\item{...}{Parameters passed to \code{predict.xgb.Booster}}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
|
|||||||
@ -31,7 +31,6 @@ num_round <- 2
|
|||||||
test_that("custom objective works", {
|
test_that("custom objective works", {
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(length(bst$raw), 1100)
|
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_false(is.null(bst$evaluation_log$eval_error))
|
expect_false(is.null(bst$evaluation_log$eval_error))
|
||||||
expect_lt(bst$evaluation_log[num_round, eval_error], 0.03)
|
expect_lt(bst$evaluation_log[num_round, eval_error], 0.03)
|
||||||
@ -58,5 +57,4 @@ test_that("custom objective using DMatrix attr works", {
|
|||||||
param$objective = logregobjattr
|
param$objective = logregobjattr
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(length(bst$raw), 1100)
|
|
||||||
})
|
})
|
||||||
|
|||||||
79
doc/python/convert_090to100.py
Normal file
79
doc/python/convert_090to100.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
'''This is a simple script that converts a pickled XGBoost
|
||||||
|
Scikit-Learn interface object from 0.90 to a native model. Pickle
|
||||||
|
format is not stable as it's a direct serialization of Python object.
|
||||||
|
We advice not to use it when stability is needed.
|
||||||
|
|
||||||
|
'''
|
||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import numpy as np
|
||||||
|
import xgboost
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
|
def save_label_encoder(le):
|
||||||
|
'''Save the label encoder in XGBClassifier'''
|
||||||
|
meta = dict()
|
||||||
|
for k, v in le.__dict__.items():
|
||||||
|
if isinstance(v, np.ndarray):
|
||||||
|
meta[k] = v.tolist()
|
||||||
|
else:
|
||||||
|
meta[k] = v
|
||||||
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
def xgboost_skl_90to100(skl_model):
|
||||||
|
'''Extract the model and related metadata in SKL model.'''
|
||||||
|
model = {}
|
||||||
|
with open(skl_model, 'rb') as fd:
|
||||||
|
old = pickle.load(fd)
|
||||||
|
if not isinstance(old, xgboost.XGBModel):
|
||||||
|
raise TypeError(
|
||||||
|
'The script only handes Scikit-Learn interface object')
|
||||||
|
|
||||||
|
# Save Scikit-Learn specific Python attributes into a JSON document.
|
||||||
|
for k, v in old.__dict__.items():
|
||||||
|
if k == '_le':
|
||||||
|
model[k] = save_label_encoder(v)
|
||||||
|
elif k == 'classes_':
|
||||||
|
model[k] = v.tolist()
|
||||||
|
elif k == '_Booster':
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
json.dumps({k: v})
|
||||||
|
model[k] = v
|
||||||
|
except TypeError:
|
||||||
|
warnings.warn(str(k) + ' is not saved in Scikit-Learn meta.')
|
||||||
|
booster = old.get_booster()
|
||||||
|
# Store the JSON serialization as an attribute
|
||||||
|
booster.set_attr(scikit_learn=json.dumps(model))
|
||||||
|
|
||||||
|
# Save it into a native model.
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
path = 'xgboost_native_model_from_' + skl_model + '-' + str(i) + '.bin'
|
||||||
|
if os.path.exists(path):
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
booster.save_model(path)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
assert xgboost.__version__ != '1.0.0', ('Please use the XGBoost version'
|
||||||
|
' that generates this pickle.')
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=('A simple script to convert pickle generated by'
|
||||||
|
' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).'))
|
||||||
|
parser.add_argument(
|
||||||
|
'--old-pickle',
|
||||||
|
type=str,
|
||||||
|
help='Path to old pickle file of Scikit-Learn interface object. '
|
||||||
|
'Will output a native model converted from this pickle file',
|
||||||
|
required=True)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
xgboost_skl_90to100(args.old_pickle)
|
||||||
@ -91,7 +91,12 @@ Loading pickled file from different version of XGBoost
|
|||||||
|
|
||||||
As noted, pickled model is neither portable nor stable, but in some cases the pickled
|
As noted, pickled model is neither portable nor stable, but in some cases the pickled
|
||||||
models are valuable. One way to restore it in the future is to load it back with that
|
models are valuable. One way to restore it in the future is to load it back with that
|
||||||
specific version of Python and XGBoost, export the model by calling `save_model`.
|
specific version of Python and XGBoost, export the model by calling `save_model`. To help
|
||||||
|
easing the mitigation, we created a simple script for converting pickled XGBoost 0.90
|
||||||
|
Scikit-Learn interface object to XGBoost 1.0.0 native model. Please note that the script
|
||||||
|
suits simple use cases, and it's advised not to use pickle when stability is needed.
|
||||||
|
It's located in ``xgboost/doc/python`` with the name ``convert_090to100.py``. See
|
||||||
|
comments in the script for more details.
|
||||||
|
|
||||||
********************************************************
|
********************************************************
|
||||||
Saving and Loading the internal parameters configuration
|
Saving and Loading the internal parameters configuration
|
||||||
|
|||||||
@ -208,6 +208,8 @@ struct LearnerModelParam {
|
|||||||
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
|
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
|
||||||
// this one as an immutable copy.
|
// this one as an immutable copy.
|
||||||
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
|
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
|
||||||
|
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
|
||||||
|
bool Initialized() const { return num_feature != 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -600,6 +600,7 @@ class DaskXGBRegressor(DaskScikitLearnBase):
|
|||||||
results = train(self.client, params, dtrain,
|
results = train(self.client, params, dtrain,
|
||||||
num_boost_round=self.get_num_boosting_rounds(),
|
num_boost_round=self.get_num_boosting_rounds(),
|
||||||
evals=evals)
|
evals=evals)
|
||||||
|
# pylint: disable=attribute-defined-outside-init
|
||||||
self._Booster = results['booster']
|
self._Booster = results['booster']
|
||||||
# pylint: disable=attribute-defined-outside-init
|
# pylint: disable=attribute-defined-outside-init
|
||||||
self.evals_result_ = results['history']
|
self.evals_result_ = results['history']
|
||||||
|
|||||||
@ -24,7 +24,7 @@ void JsonWriter::Visit(JsonArray const* arr) {
|
|||||||
for (size_t i = 0; i < size; ++i) {
|
for (size_t i = 0; i < size; ++i) {
|
||||||
auto const& value = vec[i];
|
auto const& value = vec[i];
|
||||||
this->Save(value);
|
this->Save(value);
|
||||||
if (i != size-1) { Write(", "); }
|
if (i != size-1) { Write(","); }
|
||||||
}
|
}
|
||||||
this->Write("]");
|
this->Write("]");
|
||||||
}
|
}
|
||||||
@ -38,7 +38,7 @@ void JsonWriter::Visit(JsonObject const* obj) {
|
|||||||
size_t size = obj->getObject().size();
|
size_t size = obj->getObject().size();
|
||||||
|
|
||||||
for (auto& value : obj->getObject()) {
|
for (auto& value : obj->getObject()) {
|
||||||
this->Write("\"" + value.first + "\": ");
|
this->Write("\"" + value.first + "\":");
|
||||||
this->Save(value.second);
|
this->Save(value.second);
|
||||||
|
|
||||||
if (i != size-1) {
|
if (i != size-1) {
|
||||||
|
|||||||
193
src/learner.cc
193
src/learner.cc
@ -67,19 +67,26 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
|||||||
/* \brief global bias */
|
/* \brief global bias */
|
||||||
bst_float base_score;
|
bst_float base_score;
|
||||||
/* \brief number of features */
|
/* \brief number of features */
|
||||||
unsigned num_feature;
|
uint32_t num_feature;
|
||||||
/* \brief number of classes, if it is multi-class classification */
|
/* \brief number of classes, if it is multi-class classification */
|
||||||
int num_class;
|
int32_t num_class;
|
||||||
/*! \brief Model contain additional properties */
|
/*! \brief Model contain additional properties */
|
||||||
int contain_extra_attrs;
|
int32_t contain_extra_attrs;
|
||||||
/*! \brief Model contain eval metrics */
|
/*! \brief Model contain eval metrics */
|
||||||
int contain_eval_metrics;
|
int32_t contain_eval_metrics;
|
||||||
|
/*! \brief the version of XGBoost. */
|
||||||
|
uint32_t major_version;
|
||||||
|
uint32_t minor_version;
|
||||||
/*! \brief reserved field */
|
/*! \brief reserved field */
|
||||||
int reserved[29];
|
int reserved[27];
|
||||||
/*! \brief constructor */
|
/*! \brief constructor */
|
||||||
LearnerModelParamLegacy() {
|
LearnerModelParamLegacy() {
|
||||||
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
|
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
|
||||||
base_score = 0.5f;
|
base_score = 0.5f;
|
||||||
|
major_version = std::get<0>(Version::Self());
|
||||||
|
minor_version = std::get<1>(Version::Self());
|
||||||
|
static_assert(sizeof(LearnerModelParamLegacy) == 136,
|
||||||
|
"Do not change the size of this struct, as it will break binary IO.");
|
||||||
}
|
}
|
||||||
// Skip other legacy fields.
|
// Skip other legacy fields.
|
||||||
Json ToJson() const {
|
Json ToJson() const {
|
||||||
@ -117,8 +124,9 @@ LearnerModelParam::LearnerModelParam(
|
|||||||
LearnerModelParamLegacy const &user_param, float base_margin)
|
LearnerModelParamLegacy const &user_param, float base_margin)
|
||||||
: base_score{base_margin}, num_feature{user_param.num_feature},
|
: base_score{base_margin}, num_feature{user_param.num_feature},
|
||||||
num_output_group{user_param.num_class == 0
|
num_output_group{user_param.num_class == 0
|
||||||
? 1
|
? 1
|
||||||
: static_cast<uint32_t>(user_param.num_class)} {}
|
: static_cast<uint32_t>(user_param.num_class)}
|
||||||
|
{}
|
||||||
|
|
||||||
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
||||||
// data split mode, can be row, col, or none.
|
// data split mode, can be row, col, or none.
|
||||||
@ -140,7 +148,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
|||||||
.describe("Data split mode for distributed training.");
|
.describe("Data split mode for distributed training.");
|
||||||
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
||||||
.set_default(0)
|
.set_default(0)
|
||||||
.describe("flag to disable default metric. Set to >0 to disable");
|
.describe("Flag to disable default metric. Set to >0 to disable");
|
||||||
DMLC_DECLARE_FIELD(booster)
|
DMLC_DECLARE_FIELD(booster)
|
||||||
.set_default("gbtree")
|
.set_default("gbtree")
|
||||||
.describe("Gradient booster used for training.");
|
.describe("Gradient booster used for training.");
|
||||||
@ -200,6 +208,7 @@ class LearnerImpl : public Learner {
|
|||||||
Args args = {cfg_.cbegin(), cfg_.cend()};
|
Args args = {cfg_.cbegin(), cfg_.cend()};
|
||||||
|
|
||||||
tparam_.UpdateAllowUnknown(args);
|
tparam_.UpdateAllowUnknown(args);
|
||||||
|
auto mparam_backup = mparam_;
|
||||||
mparam_.UpdateAllowUnknown(args);
|
mparam_.UpdateAllowUnknown(args);
|
||||||
generic_parameters_.UpdateAllowUnknown(args);
|
generic_parameters_.UpdateAllowUnknown(args);
|
||||||
generic_parameters_.CheckDeprecated();
|
generic_parameters_.CheckDeprecated();
|
||||||
@ -217,17 +226,33 @@ class LearnerImpl : public Learner {
|
|||||||
|
|
||||||
// set seed only before the model is initialized
|
// set seed only before the model is initialized
|
||||||
common::GlobalRandom().seed(generic_parameters_.seed);
|
common::GlobalRandom().seed(generic_parameters_.seed);
|
||||||
|
|
||||||
// must precede configure gbm since num_features is required for gbm
|
// must precede configure gbm since num_features is required for gbm
|
||||||
this->ConfigureNumFeatures();
|
this->ConfigureNumFeatures();
|
||||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||||
this->ConfigureObjective(old_tparam, &args);
|
this->ConfigureObjective(old_tparam, &args);
|
||||||
this->ConfigureGBM(old_tparam, args);
|
|
||||||
this->ConfigureMetrics(args);
|
|
||||||
|
|
||||||
|
// Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
|
||||||
|
// After 1.0.0 we save the value provided by user and keep it immutable instead. To
|
||||||
|
// keep the stability, we initialize it in binary LoadModel instead of configuration.
|
||||||
|
// Under what condition should we omit the transformation:
|
||||||
|
//
|
||||||
|
// - base_score is loaded from old binary model.
|
||||||
|
//
|
||||||
|
// What are the other possible conditions:
|
||||||
|
//
|
||||||
|
// - model loaded from new binary or JSON.
|
||||||
|
// - model is created from scratch.
|
||||||
|
// - model is configured second time due to change of parameter
|
||||||
|
if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) {
|
||||||
|
learner_model_param_ = LearnerModelParam(mparam_,
|
||||||
|
obj_->ProbToMargin(mparam_.base_score));
|
||||||
|
}
|
||||||
|
|
||||||
|
this->ConfigureGBM(old_tparam, args);
|
||||||
generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
|
generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
|
||||||
|
|
||||||
learner_model_param_ = LearnerModelParam(mparam_,
|
this->ConfigureMetrics(args);
|
||||||
obj_->ProbToMargin(mparam_.base_score));
|
|
||||||
|
|
||||||
this->need_configuration_ = false;
|
this->need_configuration_ = false;
|
||||||
if (generic_parameters_.validate_parameters) {
|
if (generic_parameters_.validate_parameters) {
|
||||||
@ -337,9 +362,6 @@ class LearnerImpl : public Learner {
|
|||||||
cache_));
|
cache_));
|
||||||
gbm_->LoadModel(gradient_booster);
|
gbm_->LoadModel(gradient_booster);
|
||||||
|
|
||||||
learner_model_param_ = LearnerModelParam(mparam_,
|
|
||||||
obj_->ProbToMargin(mparam_.base_score));
|
|
||||||
|
|
||||||
auto const& j_attributes = get<Object const>(learner.at("attributes"));
|
auto const& j_attributes = get<Object const>(learner.at("attributes"));
|
||||||
attributes_.clear();
|
attributes_.clear();
|
||||||
for (auto const& kv : j_attributes) {
|
for (auto const& kv : j_attributes) {
|
||||||
@ -459,6 +481,7 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (header[0] == '{') {
|
if (header[0] == '{') {
|
||||||
|
// Dispatch to JSON
|
||||||
auto json_stream = common::FixedSizeStream(&fp);
|
auto json_stream = common::FixedSizeStream(&fp);
|
||||||
std::string buffer;
|
std::string buffer;
|
||||||
json_stream.Take(&buffer);
|
json_stream.Take(&buffer);
|
||||||
@ -471,25 +494,10 @@ class LearnerImpl : public Learner {
|
|||||||
// read parameter
|
// read parameter
|
||||||
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
|
CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
|
||||||
<< "BoostLearner: wrong model format";
|
<< "BoostLearner: wrong model format";
|
||||||
{
|
|
||||||
// backward compatibility code for compatible with old model type
|
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
|
||||||
// for new model, Read(&name_obj_) is suffice
|
|
||||||
uint64_t len;
|
|
||||||
CHECK_EQ(fi->Read(&len, sizeof(len)), sizeof(len));
|
|
||||||
if (len >= std::numeric_limits<unsigned>::max()) {
|
|
||||||
int gap;
|
|
||||||
CHECK_EQ(fi->Read(&gap, sizeof(gap)), sizeof(gap))
|
|
||||||
<< "BoostLearner: wrong model format";
|
|
||||||
len = len >> static_cast<uint64_t>(32UL);
|
|
||||||
}
|
|
||||||
if (len != 0) {
|
|
||||||
tparam_.objective.resize(len);
|
|
||||||
CHECK_EQ(fi->Read(&tparam_.objective[0], len), len)
|
|
||||||
<< "BoostLearner: wrong model format";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
||||||
// duplicated code with LazyInitModel
|
|
||||||
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
|
||||||
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
|
gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
|
||||||
&learner_model_param_, cache_));
|
&learner_model_param_, cache_));
|
||||||
@ -508,34 +516,57 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
|
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
|
||||||
}
|
}
|
||||||
if (tparam_.objective == "count:poisson") {
|
bool warn_old_model { false };
|
||||||
std::string max_delta_step;
|
if (attributes_.find("count_poisson_max_delta_step") != attributes_.cend()) {
|
||||||
fi->Read(&max_delta_step);
|
// Loading model from < 1.0.0, objective is not saved.
|
||||||
cfg_["max_delta_step"] = max_delta_step;
|
cfg_["max_delta_step"] = attributes_.at("count_poisson_max_delta_step");
|
||||||
|
attributes_.erase("count_poisson_max_delta_step");
|
||||||
|
warn_old_model = true;
|
||||||
|
} else {
|
||||||
|
warn_old_model = false;
|
||||||
}
|
}
|
||||||
if (mparam_.contain_eval_metrics != 0) {
|
|
||||||
std::vector<std::string> metr;
|
if (mparam_.major_version >= 1) {
|
||||||
fi->Read(&metr);
|
learner_model_param_ = LearnerModelParam(mparam_,
|
||||||
for (auto name : metr) {
|
obj_->ProbToMargin(mparam_.base_score));
|
||||||
metrics_.emplace_back(Metric::Create(name, &generic_parameters_));
|
} else {
|
||||||
|
// Before 1.0.0, base_score is saved as a transformed value, and there's no version
|
||||||
|
// attribute in the saved model.
|
||||||
|
learner_model_param_ = LearnerModelParam(mparam_, mparam_.base_score);
|
||||||
|
warn_old_model = true;
|
||||||
|
}
|
||||||
|
if (attributes_.find("objective") != attributes_.cend()) {
|
||||||
|
auto obj_str = attributes_.at("objective");
|
||||||
|
auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
|
||||||
|
obj_->LoadConfig(j_obj);
|
||||||
|
attributes_.erase("objective");
|
||||||
|
} else {
|
||||||
|
warn_old_model = true;
|
||||||
|
}
|
||||||
|
if (attributes_.find("metrics") != attributes_.cend()) {
|
||||||
|
auto metrics_str = attributes_.at("metrics");
|
||||||
|
std::vector<std::string> names { common::Split(metrics_str, ';') };
|
||||||
|
attributes_.erase("metrics");
|
||||||
|
for (auto const& n : names) {
|
||||||
|
this->SetParam(kEvalMetric, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (warn_old_model) {
|
||||||
|
LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it "
|
||||||
|
"again for improved compatibility";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Renew the version.
|
||||||
|
mparam_.major_version = std::get<0>(Version::Self());
|
||||||
|
mparam_.minor_version = std::get<1>(Version::Self());
|
||||||
|
|
||||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||||
|
|
||||||
auto n = tparam_.__DICT__();
|
auto n = tparam_.__DICT__();
|
||||||
cfg_.insert(n.cbegin(), n.cend());
|
cfg_.insert(n.cbegin(), n.cend());
|
||||||
|
|
||||||
Args args = {cfg_.cbegin(), cfg_.cend()};
|
|
||||||
generic_parameters_.UpdateAllowUnknown(args);
|
|
||||||
gbm_->Configure(args);
|
|
||||||
obj_->Configure({cfg_.begin(), cfg_.end()});
|
|
||||||
|
|
||||||
for (auto& p_metric : metrics_) {
|
|
||||||
p_metric->Configure({cfg_.begin(), cfg_.end()});
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy dsplit from config since it will not run again during restore
|
// copy dsplit from config since it will not run again during restore
|
||||||
if (tparam_.dsplit == DataSplitMode::kAuto && rabit::IsDistributed()) {
|
if (tparam_.dsplit == DataSplitMode::kAuto && rabit::IsDistributed()) {
|
||||||
tparam_.dsplit = DataSplitMode::kRow;
|
tparam_.dsplit = DataSplitMode::kRow;
|
||||||
@ -552,15 +583,8 @@ class LearnerImpl : public Learner {
|
|||||||
void SaveModel(dmlc::Stream* fo) const override {
|
void SaveModel(dmlc::Stream* fo) const override {
|
||||||
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
|
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
|
||||||
std::vector<std::pair<std::string, std::string> > extra_attr;
|
std::vector<std::pair<std::string, std::string> > extra_attr;
|
||||||
// extra attributed to be added just before saving
|
mparam.contain_extra_attrs = 1;
|
||||||
if (tparam_.objective == "count:poisson") {
|
|
||||||
auto it = cfg_.find("max_delta_step");
|
|
||||||
if (it != cfg_.end()) {
|
|
||||||
// write `max_delta_step` parameter as extra attribute of booster
|
|
||||||
mparam.contain_extra_attrs = 1;
|
|
||||||
extra_attr.emplace_back("count_poisson_max_delta_step", it->second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
std::vector<std::string> saved_params;
|
std::vector<std::string> saved_params;
|
||||||
// check if rabit_bootstrap_cache were set to non zero before adding to checkpoint
|
// check if rabit_bootstrap_cache were set to non zero before adding to checkpoint
|
||||||
@ -577,6 +601,24 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
// Similar to JSON model IO, we save the objective.
|
||||||
|
Json j_obj { Object() };
|
||||||
|
obj_->SaveConfig(&j_obj);
|
||||||
|
std::string obj_doc;
|
||||||
|
Json::Dump(j_obj, &obj_doc);
|
||||||
|
extra_attr.emplace_back("objective", obj_doc);
|
||||||
|
}
|
||||||
|
// As of 1.0.0, JVM Package and R Package uses Save/Load model for serialization.
|
||||||
|
// Remove this part once they are ported to use actual serialization methods.
|
||||||
|
if (mparam.contain_eval_metrics != 0) {
|
||||||
|
std::stringstream os;
|
||||||
|
for (auto& ev : metrics_) {
|
||||||
|
os << ev->Name() << ";";
|
||||||
|
}
|
||||||
|
extra_attr.emplace_back("metrics", os.str());
|
||||||
|
}
|
||||||
|
|
||||||
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
|
fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
|
||||||
fo->Write(tparam_.objective);
|
fo->Write(tparam_.objective);
|
||||||
fo->Write(tparam_.booster);
|
fo->Write(tparam_.booster);
|
||||||
@ -587,26 +629,7 @@ class LearnerImpl : public Learner {
|
|||||||
attr[kv.first] = kv.second;
|
attr[kv.first] = kv.second;
|
||||||
}
|
}
|
||||||
fo->Write(std::vector<std::pair<std::string, std::string>>(
|
fo->Write(std::vector<std::pair<std::string, std::string>>(
|
||||||
attr.begin(), attr.end()));
|
attr.begin(), attr.end()));
|
||||||
}
|
|
||||||
if (tparam_.objective == "count:poisson") {
|
|
||||||
auto it = cfg_.find("max_delta_step");
|
|
||||||
if (it != cfg_.end()) {
|
|
||||||
fo->Write(it->second);
|
|
||||||
} else {
|
|
||||||
// recover value of max_delta_step from extra attributes
|
|
||||||
auto it2 = attributes_.find("count_poisson_max_delta_step");
|
|
||||||
const std::string max_delta_step
|
|
||||||
= (it2 != attributes_.end()) ? it2->second : kMaxDeltaStepDefaultValue;
|
|
||||||
fo->Write(max_delta_step);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mparam.contain_eval_metrics != 0) {
|
|
||||||
std::vector<std::string> metr;
|
|
||||||
for (auto& ev : metrics_) {
|
|
||||||
metr.emplace_back(ev->Name());
|
|
||||||
}
|
|
||||||
fo->Write(metr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -661,11 +684,13 @@ class LearnerImpl : public Learner {
|
|||||||
|
|
||||||
If you are loading a serialized model (like pickle in Python) generated by older
|
If you are loading a serialized model (like pickle in Python) generated by older
|
||||||
XGBoost, please export the model by calling `Booster.save_model` from that version
|
XGBoost, please export the model by calling `Booster.save_model` from that version
|
||||||
first, then load it back in current version. See:
|
first, then load it back in current version. There's a simple script for helping
|
||||||
|
the process. See:
|
||||||
|
|
||||||
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
||||||
|
|
||||||
for more details about differences between saving model and serializing.
|
for reference to the script, and more details about differences between saving model and
|
||||||
|
serializing.
|
||||||
|
|
||||||
)doc";
|
)doc";
|
||||||
int64_t sz {-1};
|
int64_t sz {-1};
|
||||||
@ -854,7 +879,8 @@ class LearnerImpl : public Learner {
|
|||||||
|
|
||||||
void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
|
void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
|
||||||
// Once binary IO is gone, NONE of these config is useful.
|
// Once binary IO is gone, NONE of these config is useful.
|
||||||
if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") {
|
if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0" &&
|
||||||
|
tparam_.objective != "multi:softprob") {
|
||||||
cfg_["num_output_group"] = cfg_["num_class"];
|
cfg_["num_output_group"] = cfg_["num_class"];
|
||||||
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
|
if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
|
||||||
tparam_.objective = "multi:softmax";
|
tparam_.objective = "multi:softmax";
|
||||||
@ -919,7 +945,6 @@ class LearnerImpl : public Learner {
|
|||||||
}
|
}
|
||||||
CHECK_NE(mparam_.num_feature, 0)
|
CHECK_NE(mparam_.num_feature, 0)
|
||||||
<< "0 feature is supplied. Are you using raw Booster interface?";
|
<< "0 feature is supplied. Are you using raw Booster interface?";
|
||||||
learner_model_param_.num_feature = mparam_.num_feature;
|
|
||||||
// Remove these once binary IO is gone.
|
// Remove these once binary IO is gone.
|
||||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||||
|
|||||||
@ -21,8 +21,9 @@ ENV GOSU_VERSION 1.10
|
|||||||
|
|
||||||
# Install Python packages
|
# Install Python packages
|
||||||
RUN \
|
RUN \
|
||||||
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh recommonmark guzzle_sphinx_theme mock \
|
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \
|
||||||
breathe matplotlib graphviz pytest scikit-learn wheel kubernetes urllib3 jsonschema && \
|
recommonmark guzzle_sphinx_theme mock breathe matplotlib graphviz \
|
||||||
|
pytest scikit-learn wheel kubernetes urllib3 jsonschema boto3 && \
|
||||||
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
|
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
|
||||||
pip install "dask[complete]"
|
pip install "dask[complete]"
|
||||||
|
|
||||||
|
|||||||
@ -183,6 +183,41 @@ TEST(Learner, JsonModelIO) {
|
|||||||
delete pp_dmat;
|
delete pp_dmat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Learner, BinaryModelIO) {
|
||||||
|
size_t constexpr kRows = 8;
|
||||||
|
int32_t constexpr kIters = 4;
|
||||||
|
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
|
||||||
|
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
|
||||||
|
p_dmat->Info().labels_.Resize(kRows);
|
||||||
|
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
|
||||||
|
learner->SetParam("eval_metric", "rmsle");
|
||||||
|
learner->Configure();
|
||||||
|
for (int32_t iter = 0; iter < kIters; ++iter) {
|
||||||
|
learner->UpdateOneIter(iter, p_dmat);
|
||||||
|
}
|
||||||
|
dmlc::TemporaryDirectory tempdir;
|
||||||
|
std::string const fname = tempdir.path + "binary_model_io.bin";
|
||||||
|
{
|
||||||
|
// Make sure the write is complete before loading.
|
||||||
|
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||||
|
learner->SaveModel(fo.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
learner.reset(Learner::Create({p_dmat}));
|
||||||
|
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
|
||||||
|
learner->LoadModel(fi.get());
|
||||||
|
learner->Configure();
|
||||||
|
Json config { Object() };
|
||||||
|
learner->SaveConfig(&config);
|
||||||
|
std::string config_str;
|
||||||
|
Json::Dump(config, &config_str);
|
||||||
|
ASSERT_NE(config_str.find("rmsle"), std::string::npos);
|
||||||
|
ASSERT_EQ(config_str.find("WARNING"), std::string::npos);
|
||||||
|
|
||||||
|
delete pp_dmat;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
// Tests for automatic GPU configuration.
|
// Tests for automatic GPU configuration.
|
||||||
TEST(Learner, GPUConfiguration) {
|
TEST(Learner, GPUConfiguration) {
|
||||||
|
|||||||
148
tests/python/generate_models.py
Normal file
148
tests/python/generate_models.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
import xgboost
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
|
kRounds = 2
|
||||||
|
kRows = 1000
|
||||||
|
kCols = 4
|
||||||
|
kForests = 2
|
||||||
|
kMaxDepth = 2
|
||||||
|
kClasses = 3
|
||||||
|
|
||||||
|
X = np.random.randn(kRows, kCols)
|
||||||
|
w = np.random.uniform(size=kRows)
|
||||||
|
|
||||||
|
version = xgboost.__version__
|
||||||
|
|
||||||
|
np.random.seed(1994)
|
||||||
|
target_dir = 'models'
|
||||||
|
|
||||||
|
|
||||||
|
def booster_bin(model):
|
||||||
|
return os.path.join(target_dir,
|
||||||
|
'xgboost-' + version + '.' + model + '.bin')
|
||||||
|
|
||||||
|
|
||||||
|
def booster_json(model):
|
||||||
|
return os.path.join(target_dir,
|
||||||
|
'xgboost-' + version + '.' + model + '.json')
|
||||||
|
|
||||||
|
|
||||||
|
def skl_bin(model):
|
||||||
|
return os.path.join(target_dir,
|
||||||
|
'xgboost_scikit-' + version + '.' + model + '.bin')
|
||||||
|
|
||||||
|
|
||||||
|
def skl_json(model):
|
||||||
|
return os.path.join(target_dir,
|
||||||
|
'xgboost_scikit-' + version + '.' + model + '.json')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_regression_model():
|
||||||
|
print('Regression')
|
||||||
|
y = np.random.randn(kRows)
|
||||||
|
|
||||||
|
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||||
|
booster = xgboost.train({'tree_method': 'hist',
|
||||||
|
'num_parallel_tree': kForests,
|
||||||
|
'max_depth': kMaxDepth},
|
||||||
|
num_boost_round=kRounds, dtrain=data)
|
||||||
|
booster.save_model(booster_bin('reg'))
|
||||||
|
booster.save_model(booster_json('reg'))
|
||||||
|
|
||||||
|
reg = xgboost.XGBRegressor(tree_method='hist',
|
||||||
|
num_parallel_tree=kForests,
|
||||||
|
max_depth=kMaxDepth,
|
||||||
|
n_estimators=kRounds)
|
||||||
|
reg.fit(X, y, w)
|
||||||
|
reg.save_model(skl_bin('reg'))
|
||||||
|
reg.save_model(skl_json('reg'))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_logistic_model():
|
||||||
|
print('Logistic')
|
||||||
|
y = np.random.randint(0, 2, size=kRows)
|
||||||
|
assert y.max() == 1 and y.min() == 0
|
||||||
|
|
||||||
|
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||||
|
booster = xgboost.train({'tree_method': 'hist',
|
||||||
|
'num_parallel_tree': kForests,
|
||||||
|
'max_depth': kMaxDepth,
|
||||||
|
'objective': 'binary:logistic'},
|
||||||
|
num_boost_round=kRounds, dtrain=data)
|
||||||
|
booster.save_model(booster_bin('logit'))
|
||||||
|
booster.save_model(booster_json('logit'))
|
||||||
|
|
||||||
|
reg = xgboost.XGBClassifier(tree_method='hist',
|
||||||
|
num_parallel_tree=kForests,
|
||||||
|
max_depth=kMaxDepth,
|
||||||
|
n_estimators=kRounds)
|
||||||
|
reg.fit(X, y, w)
|
||||||
|
reg.save_model(skl_bin('logit'))
|
||||||
|
reg.save_model(skl_json('logit'))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_classification_model():
|
||||||
|
print('Classification')
|
||||||
|
y = np.random.randint(0, kClasses, size=kRows)
|
||||||
|
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||||
|
booster = xgboost.train({'num_class': kClasses,
|
||||||
|
'tree_method': 'hist',
|
||||||
|
'num_parallel_tree': kForests,
|
||||||
|
'max_depth': kMaxDepth},
|
||||||
|
num_boost_round=kRounds, dtrain=data)
|
||||||
|
booster.save_model(booster_bin('cls'))
|
||||||
|
booster.save_model(booster_json('cls'))
|
||||||
|
|
||||||
|
cls = xgboost.XGBClassifier(tree_method='hist',
|
||||||
|
num_parallel_tree=kForests,
|
||||||
|
max_depth=kMaxDepth,
|
||||||
|
n_estimators=kRounds)
|
||||||
|
cls.fit(X, y, w)
|
||||||
|
cls.save_model(skl_bin('cls'))
|
||||||
|
cls.save_model(skl_json('cls'))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_ranking_model():
|
||||||
|
print('Learning to Rank')
|
||||||
|
y = np.random.randint(5, size=kRows)
|
||||||
|
w = np.random.uniform(size=20)
|
||||||
|
g = np.repeat(50, 20)
|
||||||
|
|
||||||
|
data = xgboost.DMatrix(X, y, weight=w)
|
||||||
|
data.set_group(g)
|
||||||
|
booster = xgboost.train({'objective': 'rank:ndcg',
|
||||||
|
'num_parallel_tree': kForests,
|
||||||
|
'tree_method': 'hist',
|
||||||
|
'max_depth': kMaxDepth},
|
||||||
|
num_boost_round=kRounds,
|
||||||
|
dtrain=data)
|
||||||
|
booster.save_model(booster_bin('ltr'))
|
||||||
|
booster.save_model(booster_json('ltr'))
|
||||||
|
|
||||||
|
ranker = xgboost.sklearn.XGBRanker(n_estimators=kRounds,
|
||||||
|
tree_method='hist',
|
||||||
|
objective='rank:ndcg',
|
||||||
|
max_depth=kMaxDepth,
|
||||||
|
num_parallel_tree=kForests)
|
||||||
|
ranker.fit(X, y, g, sample_weight=w)
|
||||||
|
ranker.save_model(skl_bin('ltr'))
|
||||||
|
ranker.save_model(skl_json('ltr'))
|
||||||
|
|
||||||
|
|
||||||
|
def write_versions():
|
||||||
|
versions = {'numpy': np.__version__,
|
||||||
|
'xgboost': version}
|
||||||
|
with open(os.path.join(target_dir, 'version'), 'w') as fd:
|
||||||
|
fd.write(str(versions))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if not os.path.exists(target_dir):
|
||||||
|
os.mkdir(target_dir)
|
||||||
|
|
||||||
|
generate_regression_model()
|
||||||
|
generate_logistic_model()
|
||||||
|
generate_classification_model()
|
||||||
|
generate_ranking_model()
|
||||||
|
write_versions()
|
||||||
@ -39,7 +39,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
param = {'max_depth': 2, 'eta': 1,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
|
|||||||
@ -284,16 +284,31 @@ class TestModels(unittest.TestCase):
|
|||||||
self.assertRaises(ValueError, bst.predict, dm1)
|
self.assertRaises(ValueError, bst.predict, dm1)
|
||||||
bst.predict(dm2) # success
|
bst.predict(dm2) # success
|
||||||
|
|
||||||
|
def test_model_binary_io(self):
|
||||||
|
model_path = 'test_model_binary_io.bin'
|
||||||
|
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
|
||||||
|
'scale_pos_weight': '0.5'}
|
||||||
|
X = np.random.random((10, 3))
|
||||||
|
y = np.random.random((10,))
|
||||||
|
dtrain = xgb.DMatrix(X, y)
|
||||||
|
bst = xgb.train(parameters, dtrain, num_boost_round=2)
|
||||||
|
bst.save_model(model_path)
|
||||||
|
bst = xgb.Booster(model_file=model_path)
|
||||||
|
os.remove(model_path)
|
||||||
|
config = json.loads(bst.save_config())
|
||||||
|
assert float(config['learner']['objective'][
|
||||||
|
'reg_loss_param']['scale_pos_weight']) == 0.5
|
||||||
|
|
||||||
def test_model_json_io(self):
|
def test_model_json_io(self):
|
||||||
model_path = './model.json'
|
model_path = 'test_model_json_io.json'
|
||||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
|
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
|
||||||
j_model = json_model(model_path, parameters)
|
j_model = json_model(model_path, parameters)
|
||||||
assert isinstance(j_model['learner'], dict)
|
assert isinstance(j_model['learner'], dict)
|
||||||
|
|
||||||
bst = xgb.Booster(model_file='./model.json')
|
bst = xgb.Booster(model_file=model_path)
|
||||||
|
|
||||||
bst.save_model(fname=model_path)
|
bst.save_model(fname=model_path)
|
||||||
with open('./model.json', 'r') as fd:
|
with open(model_path, 'r') as fd:
|
||||||
j_model = json.load(fd)
|
j_model = json.load(fd)
|
||||||
assert isinstance(j_model['learner'], dict)
|
assert isinstance(j_model['learner'], dict)
|
||||||
|
|
||||||
@ -302,7 +317,7 @@ class TestModels(unittest.TestCase):
|
|||||||
@pytest.mark.skipif(**tm.no_json_schema())
|
@pytest.mark.skipif(**tm.no_json_schema())
|
||||||
def test_json_schema(self):
|
def test_json_schema(self):
|
||||||
import jsonschema
|
import jsonschema
|
||||||
model_path = './model.json'
|
model_path = 'test_json_schema.json'
|
||||||
path = os.path.dirname(
|
path = os.path.dirname(
|
||||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
doc = os.path.join(path, 'doc', 'model.schema')
|
doc = os.path.join(path, 'doc', 'model.schema')
|
||||||
|
|||||||
130
tests/python/test_model_compatibility.py
Normal file
130
tests/python/test_model_compatibility.py
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
import xgboost
|
||||||
|
import os
|
||||||
|
import generate_models as gm
|
||||||
|
import json
|
||||||
|
import zipfile
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def run_model_param_check(config):
|
||||||
|
assert config['learner']['learner_model_param']['num_feature'] == str(4)
|
||||||
|
assert config['learner']['learner_train_param']['booster'] == 'gbtree'
|
||||||
|
|
||||||
|
|
||||||
|
def run_booster_check(booster, name):
|
||||||
|
config = json.loads(booster.save_config())
|
||||||
|
run_model_param_check(config)
|
||||||
|
if name.find('cls') != -1:
|
||||||
|
assert (len(booster.get_dump()) == gm.kForests * gm.kRounds *
|
||||||
|
gm.kClasses)
|
||||||
|
assert float(
|
||||||
|
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'multi:softmax'
|
||||||
|
elif name.find('logit') != -1:
|
||||||
|
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||||
|
assert config['learner']['learner_model_param']['num_class'] == str(0)
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'binary:logistic'
|
||||||
|
elif name.find('ltr') != -1:
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'rank:ndcg'
|
||||||
|
else:
|
||||||
|
assert name.find('reg') != -1
|
||||||
|
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||||
|
assert float(
|
||||||
|
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'reg:squarederror'
|
||||||
|
|
||||||
|
|
||||||
|
def run_scikit_model_check(name, path):
|
||||||
|
if name.find('reg') != -1:
|
||||||
|
reg = xgboost.XGBRegressor()
|
||||||
|
reg.load_model(path)
|
||||||
|
config = json.loads(reg.get_booster().save_config())
|
||||||
|
if name.find('0.90') != -1:
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'reg:linear'
|
||||||
|
else:
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'reg:squarederror'
|
||||||
|
assert (len(reg.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests)
|
||||||
|
run_model_param_check(config)
|
||||||
|
elif name.find('cls') != -1:
|
||||||
|
cls = xgboost.XGBClassifier()
|
||||||
|
cls.load_model(path)
|
||||||
|
if name.find('0.90') == -1:
|
||||||
|
assert len(cls.classes_) == gm.kClasses
|
||||||
|
assert len(cls._le.classes_) == gm.kClasses
|
||||||
|
assert cls.n_classes_ == gm.kClasses
|
||||||
|
assert (len(cls.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests * gm.kClasses), path
|
||||||
|
config = json.loads(cls.get_booster().save_config())
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'multi:softprob', path
|
||||||
|
run_model_param_check(config)
|
||||||
|
elif name.find('ltr') != -1:
|
||||||
|
ltr = xgboost.XGBRanker()
|
||||||
|
ltr.load_model(path)
|
||||||
|
assert (len(ltr.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests)
|
||||||
|
config = json.loads(ltr.get_booster().save_config())
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'rank:ndcg'
|
||||||
|
run_model_param_check(config)
|
||||||
|
elif name.find('logit') != -1:
|
||||||
|
logit = xgboost.XGBClassifier()
|
||||||
|
logit.load_model(path)
|
||||||
|
assert (len(logit.get_booster().get_dump()) ==
|
||||||
|
gm.kRounds * gm.kForests)
|
||||||
|
config = json.loads(logit.get_booster().save_config())
|
||||||
|
assert config['learner']['learner_train_param'][
|
||||||
|
'objective'] == 'binary:logistic'
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.ci
|
||||||
|
def test_model_compatibility():
|
||||||
|
'''Test model compatibility, can only be run on CI as others don't
|
||||||
|
have the credentials.
|
||||||
|
|
||||||
|
'''
|
||||||
|
path = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
path = os.path.join(path, 'models')
|
||||||
|
try:
|
||||||
|
import boto3
|
||||||
|
import botocore
|
||||||
|
except ImportError:
|
||||||
|
pytest.skip(
|
||||||
|
'Skiping compatibility tests as boto3 is not installed.')
|
||||||
|
|
||||||
|
try:
|
||||||
|
s3_bucket = boto3.resource('s3').Bucket('xgboost-ci-jenkins-artifacts')
|
||||||
|
zip_path = 'xgboost_model_compatibility_test.zip'
|
||||||
|
s3_bucket.download_file(zip_path, zip_path)
|
||||||
|
except botocore.exceptions.NoCredentialsError:
|
||||||
|
pytest.skip(
|
||||||
|
'Skiping compatibility tests as running on non-CI environment.')
|
||||||
|
|
||||||
|
with zipfile.ZipFile(zip_path, 'r') as z:
|
||||||
|
z.extractall(path)
|
||||||
|
|
||||||
|
models = [
|
||||||
|
os.path.join(root, f) for root, subdir, files in os.walk(path)
|
||||||
|
for f in files
|
||||||
|
if f != 'version'
|
||||||
|
]
|
||||||
|
assert models
|
||||||
|
|
||||||
|
for path in models:
|
||||||
|
name = os.path.basename(path)
|
||||||
|
if name.startswith('xgboost-'):
|
||||||
|
booster = xgboost.Booster(model_file=path)
|
||||||
|
run_booster_check(booster, name)
|
||||||
|
elif name.startswith('xgboost_scikit'):
|
||||||
|
run_scikit_model_check(name, path)
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
@ -115,7 +115,6 @@ class TestRanking(unittest.TestCase):
|
|||||||
# model training parameters
|
# model training parameters
|
||||||
cls.params = {'objective': 'rank:pairwise',
|
cls.params = {'objective': 'rank:pairwise',
|
||||||
'booster': 'gbtree',
|
'booster': 'gbtree',
|
||||||
'silent': 0,
|
|
||||||
'eval_metric': ['ndcg']
|
'eval_metric': ['ndcg']
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,7 +142,7 @@ class TestRanking(unittest.TestCase):
|
|||||||
Test cross-validation with a group specified
|
Test cross-validation with a group specified
|
||||||
"""
|
"""
|
||||||
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
|
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
|
||||||
early_stopping_rounds=10, nfold=10, as_pandas=False)
|
early_stopping_rounds=10, nfold=10, as_pandas=False)
|
||||||
assert isinstance(cv, dict)
|
assert isinstance(cv, dict)
|
||||||
self.assertSetEqual(set(cv.keys()), {'test-ndcg-mean', 'train-ndcg-mean', 'test-ndcg-std', 'train-ndcg-std'},
|
self.assertSetEqual(set(cv.keys()), {'test-ndcg-mean', 'train-ndcg-mean', 'test-ndcg-std', 'train-ndcg-std'},
|
||||||
"CV results dict key mismatch")
|
"CV results dict key mismatch")
|
||||||
@ -153,7 +152,8 @@ class TestRanking(unittest.TestCase):
|
|||||||
Test cross-validation with a group specified
|
Test cross-validation with a group specified
|
||||||
"""
|
"""
|
||||||
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
|
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
|
||||||
early_stopping_rounds=10, shuffle=False, nfold=10, as_pandas=False)
|
early_stopping_rounds=10, shuffle=False, nfold=10,
|
||||||
|
as_pandas=False)
|
||||||
assert isinstance(cv, dict)
|
assert isinstance(cv, dict)
|
||||||
assert len(cv) == 4
|
assert len(cv) == 4
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user