JSON configuration IO. (#5111)
* Add saving/loading JSON configuration. * Implement Python pickle interface with new IO routines. * Basic tests for training continuation.
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/io.h"
|
||||
|
||||
|
||||
TEST(c_api, XGDMatrixCreateFromMatDT) {
|
||||
std::vector<int> col0 = {0, -1, 3};
|
||||
std::vector<float> col1 = {-4.0f, 2.0f, 0.0f};
|
||||
@@ -77,7 +78,41 @@ TEST(c_api, Version) {
|
||||
ASSERT_EQ(patch, XGBOOST_VER_PATCH);
|
||||
}
|
||||
|
||||
TEST(c_api, Json_ModelIO){
|
||||
TEST(c_api, ConfigIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
|
||||
auto p_dmat = *pp_dmat;
|
||||
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
|
||||
std::vector<bst_float> labels(kRows);
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
labels[i] = i;
|
||||
}
|
||||
p_dmat->Info().labels_.HostVector() = labels;
|
||||
|
||||
std::shared_ptr<Learner> learner { Learner::Create(mat) };
|
||||
|
||||
BoosterHandle handle = learner.get();
|
||||
learner->UpdateOneIter(0, p_dmat.get());
|
||||
|
||||
char const* out[1];
|
||||
bst_ulong len {0};
|
||||
XGBoosterSaveJsonConfig(handle, &len, out);
|
||||
|
||||
std::string config_str_0 { out[0] };
|
||||
auto config_0 = Json::Load({config_str_0.c_str(), config_str_0.size()});
|
||||
XGBoosterLoadJsonConfig(handle, out[0]);
|
||||
|
||||
bst_ulong len_1 {0};
|
||||
std::string config_str_1 { out[0] };
|
||||
XGBoosterSaveJsonConfig(handle, &len_1, out);
|
||||
auto config_1 = Json::Load({config_str_1.c_str(), config_str_1.size()});
|
||||
|
||||
ASSERT_EQ(config_0, config_1);
|
||||
|
||||
delete pp_dmat;
|
||||
}
|
||||
|
||||
TEST(c_api, Json_ModelIO) {
|
||||
size_t constexpr kRows = 10;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
|
||||
|
||||
@@ -117,15 +117,28 @@ TEST(GBTree, Json_IO) {
|
||||
CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &gparam) };
|
||||
|
||||
Json model {Object()};
|
||||
model["model"] = Object();
|
||||
auto& j_model = model["model"];
|
||||
|
||||
gbm->SaveModel(&model);
|
||||
model["config"] = Object();
|
||||
auto& j_param = model["config"];
|
||||
|
||||
gbm->SaveModel(&j_model);
|
||||
gbm->SaveConfig(&j_param);
|
||||
|
||||
std::string model_str;
|
||||
Json::Dump(model, &model_str);
|
||||
|
||||
auto loaded_model = Json::Load(StringView{model_str.c_str(), model_str.size()});
|
||||
ASSERT_EQ(get<String>(loaded_model["name"]), "gbtree");
|
||||
ASSERT_TRUE(IsA<Object>(loaded_model["model"]["gbtree_model_param"]));
|
||||
model = Json::Load({model_str.c_str(), model_str.size()});
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "gbtree");
|
||||
|
||||
auto const& gbtree_model = model["model"]["model"];
|
||||
ASSERT_EQ(get<Array>(gbtree_model["trees"]).size(), 1);
|
||||
ASSERT_EQ(get<Integer>(get<Object>(get<Array>(gbtree_model["trees"]).front()).at("id")), 0);
|
||||
ASSERT_EQ(get<Array>(gbtree_model["tree_info"]).size(), 1);
|
||||
|
||||
auto j_train_param = model["config"]["gbtree_train_param"];
|
||||
ASSERT_EQ(get<String>(j_train_param["num_parallel_tree"]), "1");
|
||||
}
|
||||
|
||||
TEST(Dart, Json_IO) {
|
||||
@@ -145,20 +158,21 @@ TEST(Dart, Json_IO) {
|
||||
Json model {Object()};
|
||||
model["model"] = Object();
|
||||
auto& j_model = model["model"];
|
||||
model["parameters"] = Object();
|
||||
model["config"] = Object();
|
||||
|
||||
auto& j_param = model["config"];
|
||||
|
||||
gbm->SaveModel(&j_model);
|
||||
gbm->SaveConfig(&j_param);
|
||||
|
||||
std::string model_str;
|
||||
Json::Dump(model, &model_str);
|
||||
|
||||
model = Json::Load({model_str.c_str(), model_str.size()});
|
||||
|
||||
{
|
||||
auto const& gbtree = model["model"]["gbtree"];
|
||||
ASSERT_TRUE(IsA<Object>(gbtree));
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "dart");
|
||||
ASSERT_NE(get<Array>(model["model"]["weight_drop"]).size(), 0);
|
||||
}
|
||||
ASSERT_EQ(get<String>(model["model"]["name"]), "dart") << model;
|
||||
ASSERT_EQ(get<String>(model["config"]["name"]), "dart");
|
||||
ASSERT_TRUE(IsA<Object>(model["model"]["gbtree"]));
|
||||
ASSERT_NE(get<Array>(model["model"]["weight_drop"]).size(), 0);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -13,23 +13,6 @@
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
|
||||
namespace {
|
||||
|
||||
inline void CheckCAPICall(int ret) {
|
||||
ASSERT_EQ(ret, 0) << XGBGetLastError();
|
||||
}
|
||||
|
||||
} // namespace anonymous
|
||||
|
||||
const std::map<std::string, std::string>&
|
||||
QueryBoosterConfigurationArguments(BoosterHandle handle) {
|
||||
CHECK_NE(handle, static_cast<void*>(nullptr));
|
||||
auto* bst = static_cast<xgboost::Learner*>(handle);
|
||||
bst->Configure();
|
||||
return bst->GetConfigurationArguments();
|
||||
}
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
|
||||
@@ -110,77 +93,5 @@ TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test whether pickling preserves predictor parameters
|
||||
TEST(gpu_predictor, PicklingTest) {
|
||||
int const gpuid = 0;
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateBigTestData(tmp_file, 600);
|
||||
|
||||
DMatrixHandle dmat[1];
|
||||
BoosterHandle bst, bst2;
|
||||
std::vector<bst_float> label;
|
||||
for (int i = 0; i < 200; ++i) {
|
||||
label.push_back((i % 2 ? 1 : 0));
|
||||
}
|
||||
|
||||
// Load data matrix
|
||||
ASSERT_EQ(XGDMatrixCreateFromFile(
|
||||
tmp_file.c_str(), 0, &dmat[0]), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGDMatrixSetFloatInfo(
|
||||
dmat[0], "label", label.data(), 200), 0) << XGBGetLastError();
|
||||
// Create booster
|
||||
ASSERT_EQ(XGBoosterCreate(dmat, 1, &bst), 0) << XGBGetLastError();
|
||||
// Set parameters
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "0"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "base_score", "0.5"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "booster", "gbtree"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "learning_rate", "0.01"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "max_depth", "8"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(
|
||||
bst, "objective", "binary:logistic"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "seed", "123"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(
|
||||
bst, "tree_method", "gpu_hist"), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(
|
||||
bst, "gpu_id", std::to_string(gpuid).c_str()), 0) << XGBGetLastError();
|
||||
ASSERT_EQ(XGBoosterSetParam(bst, "predictor", "gpu_predictor"), 0) << XGBGetLastError();
|
||||
|
||||
// Run boosting iterations
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
ASSERT_EQ(XGBoosterUpdateOneIter(bst, i, dmat[0]), 0) << XGBGetLastError();
|
||||
}
|
||||
|
||||
// Delete matrix
|
||||
CheckCAPICall(XGDMatrixFree(dmat[0]));
|
||||
|
||||
// Pickle
|
||||
const char* dptr;
|
||||
bst_ulong len;
|
||||
std::string buf;
|
||||
CheckCAPICall(XGBoosterGetModelRaw(bst, &len, &dptr));
|
||||
buf = std::string(dptr, len);
|
||||
CheckCAPICall(XGBoosterFree(bst));
|
||||
|
||||
// Unpickle
|
||||
CheckCAPICall(XGBoosterCreate(nullptr, 0, &bst2));
|
||||
CheckCAPICall(XGBoosterLoadModelFromBuffer(bst2, buf.c_str(), len));
|
||||
|
||||
{ // Query predictor
|
||||
const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
|
||||
ASSERT_EQ(kwargs.at("predictor"), "gpu_predictor");
|
||||
ASSERT_EQ(kwargs.at("gpu_id"), std::to_string(gpuid).c_str());
|
||||
}
|
||||
|
||||
{ // Change predictor and query again
|
||||
CheckCAPICall(XGBoosterSetParam(bst2, "predictor", "cpu_predictor"));
|
||||
const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
|
||||
ASSERT_EQ(kwargs.at("predictor"), "cpu_predictor");
|
||||
}
|
||||
|
||||
CheckCAPICall(XGBoosterFree(bst2));
|
||||
}
|
||||
} // namespace predictor
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user