[Breaking] Change default evaluation metric for classification to logloss / mlogloss (#6183)

* Change DefaultEvalMetric of classification from error to logloss * Change default binary metric in plugin/example/custom_obj.cc * Set old error metric in python tests * Set old error metric in R tests * Fix missed eval metrics and typos in R tests * Fix setting eval_metric twice in R tests * Add warning for empty eval_metric for classification * Fix Dask tests Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
2020-10-02 21:06:47 +02:00
parent e0e4f15d0e
commit cf4f019ed6
18 changed files with 56 additions and 32 deletions
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -17,7 +17,8 @@ test_that("train and predict binary classification", {
  nrounds <- 2
  expect_output(
    bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-                  eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
+                  eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
+                  eval_metric = "error")
  , "train-error")
  expect_equal(class(bst), "xgb.Booster")
  expect_equal(bst$niter, nrounds)
@@ -122,7 +123,7 @@ test_that("train and predict softprob", {
  expect_output(
    bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
                   max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
-                   objective = "multi:softprob", num_class = 3)
+                   objective = "multi:softprob", num_class = 3, eval_metric = "merror")
  , "train-merror")
  expect_false(is.null(bst$evaluation_log))
  expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
@@ -150,7 +151,7 @@ test_that("train and predict softmax", {
  expect_output(
    bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
                   max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
-                   objective = "multi:softmax", num_class = 3)
+                   objective = "multi:softmax", num_class = 3, eval_metric = "merror")
  , "train-merror")
  expect_false(is.null(bst$evaluation_log))
  expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
@@ -167,7 +168,7 @@ test_that("train and predict RF", {
  lb <- train$label
  # single iteration
  bst <- xgboost(data = train$data, label = lb, max_depth = 5,
-                 nthread = 2, nrounds = 1, objective = "binary:logistic",
+                 nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
                 num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
  expect_equal(bst$niter, 1)
  expect_equal(xgb.ntree(bst), 20)
@@ -193,7 +194,8 @@ test_that("train and predict RF with softprob", {
  set.seed(11)
  bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
                 max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
-                 objective = "multi:softprob", num_class = 3, verbose = 0,
+                 objective = "multi:softprob", eval_metric = "merror",
+                 num_class = 3, verbose = 0,
                 num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
  expect_equal(bst$niter, 15)
  expect_equal(xgb.ntree(bst), 15 * 3 * 4)
@@ -274,7 +276,7 @@ test_that("xgb.cv works", {
  expect_output(
    cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
                 eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
-                 verbose = TRUE)
+                 eval_metric = "error", verbose = TRUE)
  , "train-error:")
  expect_is(cv, 'xgb.cv.synchronous')
  expect_false(is.null(cv$evaluation_log))
@@ -299,7 +301,7 @@ test_that("xgb.cv works with stratified folds", {
                eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
                verbose = TRUE, stratified = TRUE)
  # Stratified folds should result in a different evaluation logs
-  expect_true(all(cv$evaluation_log[, test_error_mean] != cv2$evaluation_log[, test_error_mean]))
+  expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
 })

 test_that("train and predict with non-strict classes", {
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -26,7 +26,8 @@ watchlist <- list(train = dtrain, test = dtest)

 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)

-param <- list(objective = "binary:logistic", max_depth = 2, nthread = 2)
+param <- list(objective = "binary:logistic", eval_metric = "error",
+              max_depth = 2, nthread = 2)


 test_that("cb.print.evaluation works as expected", {
@@ -105,7 +106,8 @@ test_that("cb.evaluation.log works as expected", {
 })


-param <- list(objective = "binary:logistic", max_depth = 4, nthread = 2)
+param <- list(objective = "binary:logistic", eval_metric = "error",
+              max_depth = 4, nthread = 2)

 test_that("can store evaluation_log without printing", {
  expect_silent(
@@ -236,7 +238,7 @@ test_that("early stopping xgb.train works", {
 test_that("early stopping using a specific metric works", {
  set.seed(11)
  expect_output(
-    bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.6,
+    bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
                     eval_metric = "logloss", eval_metric = "auc",
                     callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE,
                                                    metric_name = 'test_logloss')))
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -8,7 +8,7 @@ test_that("gblinear works", {
  dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
  dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

-  param <- list(objective = "binary:logistic", booster = "gblinear",
+  param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
                nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
  watchlist <- list(eval = dtest, train = dtrain)