Disable JSON full serialization for now. (#6248)

* Disable JSON serialization for now.

* Multi-class classification is checkpointing for each iteration.
This brings significant overhead.

Revert: 90355b4f007ae

* Set R tests to use binary.
This commit is contained in:
Jiaming Yuan 2020-10-16 17:59:54 +08:00 committed by GitHub
parent 52452bebb9
commit 6d293020fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 52 additions and 17 deletions

View File

@ -175,16 +175,16 @@ test_that("cb.reset.parameters works as expected", {
}) })
test_that("cb.save.model works as expected", { test_that("cb.save.model works as expected", {
files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json') files <- c('xgboost_01.bin', 'xgboost_02.bin', 'xgboost.bin')
for (f in files) if (file.exists(f)) file.remove(f) for (f in files) if (file.exists(f)) file.remove(f)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0, bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
save_period = 1, save_name = "xgboost_%02d.json") save_period = 1, save_name = "xgboost_%02d.bin")
expect_true(file.exists('xgboost_01.json')) expect_true(file.exists('xgboost_01.bin'))
expect_true(file.exists('xgboost_02.json')) expect_true(file.exists('xgboost_02.bin'))
b1 <- xgb.load('xgboost_01.json') b1 <- xgb.load('xgboost_01.bin')
expect_equal(xgb.ntree(b1), 1) expect_equal(xgb.ntree(b1), 1)
b2 <- xgb.load('xgboost_02.json') b2 <- xgb.load('xgboost_02.bin')
expect_equal(xgb.ntree(b2), 2) expect_equal(xgb.ntree(b2), 2)
xgb.config(b2) <- xgb.config(bst) xgb.config(b2) <- xgb.config(bst)
@ -193,9 +193,9 @@ test_that("cb.save.model works as expected", {
# save_period = 0 saves the last iteration's model # save_period = 0 saves the last iteration's model
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0, bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
save_period = 0, save_name = 'xgboost.json') save_period = 0, save_name = 'xgboost.bin')
expect_true(file.exists('xgboost.json')) expect_true(file.exists('xgboost.bin'))
b2 <- xgb.load('xgboost.json') b2 <- xgb.load('xgboost.bin')
xgb.config(b2) <- xgb.config(bst) xgb.config(b2) <- xgb.config(bst)
expect_equal(bst$raw, b2$raw) expect_equal(bst$raw, b2$raw)

View File

@ -27,7 +27,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
int gpu_id; int gpu_id;
// gpu page size in external memory mode, 0 means using the default. // gpu page size in external memory mode, 0 means using the default.
size_t gpu_page_size; size_t gpu_page_size;
bool enable_experimental_json_serialization {true}; bool enable_experimental_json_serialization {false};
bool validate_parameters {false}; bool validate_parameters {false};
void CheckDeprecated() { void CheckDeprecated() {
@ -68,7 +68,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
.set_lower_bound(0) .set_lower_bound(0)
.describe("GPU page size when running in external memory mode."); .describe("GPU page size when running in external memory mode.");
DMLC_DECLARE_FIELD(enable_experimental_json_serialization) DMLC_DECLARE_FIELD(enable_experimental_json_serialization)
.set_default(true) .set_default(false)
.describe("Enable using JSON for memory serialization (Python Pickle, " .describe("Enable using JSON for memory serialization (Python Pickle, "
"rabit checkpoints etc.)."); "rabit checkpoints etc.).");
DMLC_DECLARE_FIELD(validate_parameters) DMLC_DECLARE_FIELD(validate_parameters)

View File

@ -224,6 +224,7 @@ TEST_F(SerializationTest, Exact) {
{"nthread", "1"}, {"nthread", "1"},
{"base_score", "3.14195265"}, {"base_score", "3.14195265"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -233,6 +234,7 @@ TEST_F(SerializationTest, Exact) {
{"base_score", "3.14195265"}, {"base_score", "3.14195265"},
{"max_depth", "2"}, {"max_depth", "2"},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -241,6 +243,7 @@ TEST_F(SerializationTest, Exact) {
{"nthread", "1"}, {"nthread", "1"},
{"base_score", "3.14195265"}, {"base_score", "3.14195265"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -250,6 +253,7 @@ TEST_F(SerializationTest, Approx) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -258,6 +262,7 @@ TEST_F(SerializationTest, Approx) {
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -265,6 +270,7 @@ TEST_F(SerializationTest, Approx) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -274,6 +280,7 @@ TEST_F(SerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -282,6 +289,7 @@ TEST_F(SerializationTest, Hist) {
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -289,6 +297,7 @@ TEST_F(SerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -297,6 +306,7 @@ TEST_F(SerializationTest, CPUCoordDescent) {
TestLearnerSerialization({{"booster", "gblinear"}, TestLearnerSerialization({{"booster", "gblinear"},
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "coord_descent"}}, {"updater", "coord_descent"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -305,6 +315,7 @@ TEST_F(SerializationTest, CPUCoordDescent) {
TEST_F(SerializationTest, GpuHist) { TEST_F(SerializationTest, GpuHist) {
TestLearnerSerialization({{"booster", "gbtree"}, TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"}, {"seed", "0"},
{"enable_experimental_json_serialization", "1"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
@ -312,6 +323,7 @@ TEST_F(SerializationTest, GpuHist) {
TestLearnerSerialization({{"booster", "gbtree"}, TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"}, {"seed", "0"},
{"enable_experimental_json_serialization", "1"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
@ -320,6 +332,7 @@ TEST_F(SerializationTest, GpuHist) {
TestLearnerSerialization({{"booster", "dart"}, TestLearnerSerialization({{"booster", "dart"},
{"seed", "0"}, {"seed", "0"},
{"enable_experimental_json_serialization", "1"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
@ -377,6 +390,7 @@ TEST_F(SerializationTest, GPUCoordDescent) {
TestLearnerSerialization({{"booster", "gblinear"}, TestLearnerSerialization({{"booster", "gblinear"},
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "gpu_coord_descent"}}, {"updater", "gpu_coord_descent"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -411,6 +425,7 @@ TEST_F(LogitSerializationTest, Exact) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -419,6 +434,7 @@ TEST_F(LogitSerializationTest, Exact) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -429,6 +445,7 @@ TEST_F(LogitSerializationTest, Approx) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -437,6 +454,7 @@ TEST_F(LogitSerializationTest, Approx) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -447,6 +465,7 @@ TEST_F(LogitSerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -455,6 +474,7 @@ TEST_F(LogitSerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -463,6 +483,7 @@ TEST_F(LogitSerializationTest, CPUCoordDescent) {
TestLearnerSerialization({{"booster", "gblinear"}, TestLearnerSerialization({{"booster", "gblinear"},
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "coord_descent"}}, {"updater", "coord_descent"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -474,12 +495,14 @@ TEST_F(LogitSerializationTest, GpuHist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"}, TestLearnerSerialization({{"booster", "gbtree"},
{"objective", "binary:logistic"}, {"objective", "binary:logistic"},
{"seed", "0"}, {"seed", "0"},
{"enable_experimental_json_serialization", "1"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
@ -491,6 +514,7 @@ TEST_F(LogitSerializationTest, GpuHist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", "2"}, {"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -500,6 +524,7 @@ TEST_F(LogitSerializationTest, GPUCoordDescent) {
{"objective", "binary:logistic"}, {"objective", "binary:logistic"},
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "gpu_coord_descent"}}, {"updater", "gpu_coord_descent"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -535,6 +560,7 @@ TEST_F(MultiClassesSerializationTest, Exact) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -544,6 +570,7 @@ TEST_F(MultiClassesSerializationTest, Exact) {
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -552,6 +579,7 @@ TEST_F(MultiClassesSerializationTest, Exact) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}}, {"tree_method", "exact"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -562,6 +590,7 @@ TEST_F(MultiClassesSerializationTest, Approx) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -570,6 +599,7 @@ TEST_F(MultiClassesSerializationTest, Approx) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}}, {"tree_method", "approx"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -580,6 +610,7 @@ TEST_F(MultiClassesSerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -588,6 +619,7 @@ TEST_F(MultiClassesSerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"num_parallel_tree", "4"}, {"num_parallel_tree", "4"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -597,6 +629,7 @@ TEST_F(MultiClassesSerializationTest, Hist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}}, {"tree_method", "hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -605,6 +638,7 @@ TEST_F(MultiClassesSerializationTest, CPUCoordDescent) {
TestLearnerSerialization({{"booster", "gblinear"}, TestLearnerSerialization({{"booster", "gblinear"},
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "coord_descent"}}, {"updater", "coord_descent"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -620,6 +654,7 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
// different result (1e-7) with CPU predictor for some // different result (1e-7) with CPU predictor for some
// entries. // entries.
{"predictor", "gpu_predictor"}, {"predictor", "gpu_predictor"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -631,6 +666,7 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
// GPU_Hist has higher floating point error. 1e-6 doesn't work // GPU_Hist has higher floating point error. 1e-6 doesn't work
// after num_parallel_tree goes to 4 // after num_parallel_tree goes to 4
{"num_parallel_tree", "3"}, {"num_parallel_tree", "3"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
@ -639,6 +675,7 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"max_depth", std::to_string(kClasses)}, {"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}}, {"tree_method", "gpu_hist"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }
@ -648,6 +685,7 @@ TEST_F(MultiClassesSerializationTest, GPUCoordDescent) {
{"num_class", std::to_string(kClasses)}, {"num_class", std::to_string(kClasses)},
{"seed", "0"}, {"seed", "0"},
{"nthread", "1"}, {"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "gpu_coord_descent"}}, {"updater", "gpu_coord_descent"}},
fmap_, p_dmat_); fmap_, p_dmat_);
} }

View File

@ -47,12 +47,9 @@ eval[test] = {data_path}
seed = 1994 seed = 1994
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
model_out_cli = os.path.join( model_out_cli = os.path.join(tmpdir, 'test_load_cli_model-cli.bin')
tmpdir, 'test_load_cli_model-cli.json') model_out_py = os.path.join(tmpdir, 'test_cli_model-py.bin')
model_out_py = os.path.join( config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
tmpdir, 'test_cli_model-py.json')
config_path = os.path.join(
tmpdir, 'test_load_cli_model.conf')
train_conf = self.template.format(data_path=data_path, train_conf = self.template.format(data_path=data_path,
seed=seed, seed=seed,