Enable loading model from <1.0.0 trained with objective='binary:logitraw' (#6517)

* Enable loading model from <1.0.0 trained with objective='binary:logitraw'

* Add binary:logitraw in model compatibility testing suite

* Feedback from @trivialfis: Override ProbToMargin() for LogisticRaw

Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
Philip Hyunsu Cho 2020-12-16 16:53:46 -08:00 committed by GitHub
parent bf6cfe3b99
commit ad1a527709
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 21 deletions

View File

@ -2,7 +2,6 @@
# of saved model files from XGBoost version 0.90 and 1.0.x. # of saved model files from XGBoost version 0.90 and 1.0.x.
library(xgboost) library(xgboost)
library(Matrix) library(Matrix)
source('./generate_models_params.R')
set.seed(0) set.seed(0)
metadata <- list( metadata <- list(
@ -53,11 +52,16 @@ generate_logistic_model <- function () {
y <- sample(0:1, size = metadata$kRows, replace = TRUE) y <- sample(0:1, size = metadata$kRows, replace = TRUE)
stopifnot(max(y) == 1, min(y) == 0) stopifnot(max(y) == 1, min(y) == 0)
data <- xgb.DMatrix(X, label = y, weight = w) objective <- c('binary:logistic', 'binary:logitraw')
params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests, name <- c('logit', 'logitraw')
max_depth = metadata$kMaxDepth, objective = 'binary:logistic')
booster <- xgb.train(params, data, nrounds = metadata$kRounds) for (i in seq_len(length(objective))) {
save_booster(booster, 'logit') data <- xgb.DMatrix(X, label = y, weight = w)
params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
max_depth = metadata$kMaxDepth, objective = objective[i])
booster <- xgb.train(params, data, nrounds = metadata$kRounds)
save_booster(booster, name[i])
}
} }
generate_classification_model <- function () { generate_classification_model <- function () {

View File

@ -39,6 +39,10 @@ run_booster_check <- function (booster, name) {
testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax') testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax')
testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class),
metadata$kClasses) metadata$kClasses)
} else if (name == 'logitraw') {
testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw')
} else if (name == 'logit') { } else if (name == 'logit') {
testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds) testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0) testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)

View File

@ -162,6 +162,9 @@ struct LogisticRaw : public LogisticRegression {
predt = common::Sigmoid(predt); predt = common::Sigmoid(predt);
return std::max(predt * (T(1.0f) - predt), eps); return std::max(predt * (T(1.0f) - predt), eps);
} }
static bst_float ProbToMargin(bst_float base_score) {
return base_score;
}
static const char* DefaultEvalMetric() { return "auc"; } static const char* DefaultEvalMetric() { return "auc"; }
static const char* Name() { return "binary:logitraw"; } static const char* Name() { return "binary:logitraw"; }

View File

@ -64,22 +64,24 @@ def generate_logistic_model():
y = np.random.randint(0, 2, size=kRows) y = np.random.randint(0, 2, size=kRows)
assert y.max() == 1 and y.min() == 0 assert y.max() == 1 and y.min() == 0
data = xgboost.DMatrix(X, label=y, weight=w) for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]:
booster = xgboost.train({'tree_method': 'hist', data = xgboost.DMatrix(X, label=y, weight=w)
'num_parallel_tree': kForests, booster = xgboost.train({'tree_method': 'hist',
'max_depth': kMaxDepth, 'num_parallel_tree': kForests,
'objective': 'binary:logistic'}, 'max_depth': kMaxDepth,
num_boost_round=kRounds, dtrain=data) 'objective': objective},
booster.save_model(booster_bin('logit')) num_boost_round=kRounds, dtrain=data)
booster.save_model(booster_json('logit')) booster.save_model(booster_bin(name))
booster.save_model(booster_json(name))
reg = xgboost.XGBClassifier(tree_method='hist', reg = xgboost.XGBClassifier(tree_method='hist',
num_parallel_tree=kForests, num_parallel_tree=kForests,
max_depth=kMaxDepth, max_depth=kMaxDepth,
n_estimators=kRounds) n_estimators=kRounds,
reg.fit(X, y, w) objective=objective)
reg.save_model(skl_bin('logit')) reg.fit(X, y, w)
reg.save_model(skl_json('logit')) reg.save_model(skl_bin(name))
reg.save_model(skl_json(name))
def generate_classification_model(): def generate_classification_model():

View File

@ -24,6 +24,10 @@ def run_booster_check(booster, name):
config['learner']['learner_model_param']['base_score']) == 0.5 config['learner']['learner_model_param']['base_score']) == 0.5
assert config['learner']['learner_train_param'][ assert config['learner']['learner_train_param'][
'objective'] == 'multi:softmax' 'objective'] == 'multi:softmax'
elif name.find('logitraw') != -1:
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
assert config['learner']['learner_model_param']['num_class'] == str(0)
assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
elif name.find('logit') != -1: elif name.find('logit') != -1:
assert len(booster.get_dump()) == gm.kForests * gm.kRounds assert len(booster.get_dump()) == gm.kForests * gm.kRounds
assert config['learner']['learner_model_param']['num_class'] == str(0) assert config['learner']['learner_model_param']['num_class'] == str(0)
@ -77,6 +81,13 @@ def run_scikit_model_check(name, path):
assert config['learner']['learner_train_param'][ assert config['learner']['learner_train_param'][
'objective'] == 'rank:ndcg' 'objective'] == 'rank:ndcg'
run_model_param_check(config) run_model_param_check(config)
elif name.find('logitraw') != -1:
logit = xgboost.XGBClassifier()
logit.load_model(path)
assert (len(logit.get_booster().get_dump()) ==
gm.kRounds * gm.kForests)
config = json.loads(logit.get_booster().save_config())
assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
elif name.find('logit') != -1: elif name.find('logit') != -1:
logit = xgboost.XGBClassifier() logit = xgboost.XGBClassifier()
logit.load_model(path) logit.load_model(path)