From 98be9aef9aab76ca8e1ca6c250c565dcdbcb7be0 Mon Sep 17 00:00:00 2001 From: Tong He Date: Sat, 27 Jan 2018 17:06:28 -0800 Subject: [PATCH] A fix for CRAN submission of version 0.7-0 (#3061) * modify test_helper.R * fix noLD * update desc * fix solaris test * fix desc * improve fix * fix url --- R-package/DESCRIPTION | 4 ++-- R-package/R/xgb.train.R | 6 ++--- R-package/man/xgb.train.Rd | 2 +- R-package/src/init.c | 8 +++---- R-package/tests/testthat/test_basic.R | 7 +++--- R-package/tests/testthat/test_helpers.R | 32 +++++++++++++++---------- 6 files changed, 34 insertions(+), 25 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index b5446d3b7..3fd72c27b 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 0.6.4.8 -Date: 2017-12-05 +Version: 0.7.0 +Date: 2018-01-22 Author: Tianqi Chen , Tong He , Michael Benesty , Vadim Khotilovich , Yuan Tang diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 73b79bebc..ab39fa0e9 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -121,7 +121,7 @@ #' \itemize{ #' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} #' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} -#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/} +#' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss} #' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. #' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. #' Different threshold (e.g., 0.) could be specified as "error@0." @@ -351,8 +351,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), if (inherits(xgb_model, 'xgb.Booster') && !is_update && !is.null(xgb_model$evaluation_log) && - all.equal(colnames(evaluation_log), - colnames(xgb_model$evaluation_log))) { + isTRUE(all.equal(colnames(evaluation_log), + colnames(xgb_model$evaluation_log)))) { evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log)) } bst$evaluation_log <- evaluation_log diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index e4e4c145f..a4776f4fd 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -179,7 +179,7 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi \itemize{ \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} - \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/} + \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss} \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. Different threshold (e.g., 0.) could be specified as "error@0." diff --git a/R-package/src/init.c b/R-package/src/init.c index c07fa99eb..bd493a2af 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -19,10 +19,10 @@ extern SEXP XGBoosterBoostOneIter_R(SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterCreate_R(SEXP); extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP); -extern SEXP XGBoosterGetAttr_R(SEXP, SEXP); extern SEXP XGBoosterGetAttrNames_R(SEXP); -extern SEXP XGBoosterLoadModel_R(SEXP, SEXP); +extern SEXP XGBoosterGetAttr_R(SEXP, SEXP); extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP); +extern SEXP XGBoosterLoadModel_R(SEXP, SEXP); extern SEXP XGBoosterModelToRaw_R(SEXP); extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterSaveModel_R(SEXP, SEXP); @@ -45,10 +45,10 @@ static const R_CallMethodDef CallEntries[] = { {"XGBoosterCreate_R", (DL_FUNC) &XGBoosterCreate_R, 1}, {"XGBoosterDumpModel_R", (DL_FUNC) &XGBoosterDumpModel_R, 4}, {"XGBoosterEvalOneIter_R", (DL_FUNC) &XGBoosterEvalOneIter_R, 4}, - {"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2}, {"XGBoosterGetAttrNames_R", (DL_FUNC) &XGBoosterGetAttrNames_R, 1}, - {"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2}, + {"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2}, {"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2}, + {"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2}, {"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1}, {"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4}, {"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2}, diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 3b11a0614..e7a6679d3 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -11,6 +11,7 @@ set.seed(1994) # disable some tests for Win32 windows_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8 +solaris_flag = (Sys.info()['sysname'] == "SunOS") test_that("train and predict binary classification", { nrounds = 2 @@ -152,20 +153,20 @@ test_that("training continuation works", { bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0) # continue for two more: bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1) - if (!windows_flag) + if (!windows_flag && !solaris_flag) expect_equal(bst$raw, bst2$raw) expect_false(is.null(bst2$evaluation_log)) expect_equal(dim(bst2$evaluation_log), c(4, 2)) expect_equal(bst2$evaluation_log, bst$evaluation_log) # test continuing from raw model data bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw) - if (!windows_flag) + if (!windows_flag && !solaris_flag) expect_equal(bst$raw, bst2$raw) expect_equal(dim(bst2$evaluation_log), c(2, 2)) # test continuing from a model in file xgb.save(bst1, "xgboost.model") bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model") - if (!windows_flag) + if (!windows_flag && !solaris_flag) expect_equal(bst$raw, bst2$raw) expect_equal(dim(bst2$evaluation_log), c(2, 2)) }) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 67f4ab40b..2caf4bbc1 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -5,6 +5,8 @@ require(data.table) require(Matrix) require(vcd, quietly = TRUE) +float_tolerance = 5e-6 + set.seed(1982) data(Arthritis) df <- data.table(Arthritis, keep.rownames = F) @@ -85,7 +87,8 @@ test_that("predict feature contributions works", { X <- sparse_matrix colnames(X) <- NULL expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA) - expect_equal(pred_contr, pred_contr_, check.attributes = FALSE) + expect_equal(pred_contr, pred_contr_, check.attributes = FALSE, + tolerance = float_tolerance) # gbtree binary classifier (approximate method) expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA) @@ -104,7 +107,8 @@ test_that("predict feature contributions works", { coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric coefs <- c(coefs[-1], coefs[1]) # intercept must be the last pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*") - expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5) + expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), + tolerance = float_tolerance) # gbtree multiclass pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE) @@ -123,11 +127,12 @@ test_that("predict feature contributions works", { coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE) for (g in seq_along(pred_contr)) { expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS")) - expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6) + expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance) # manual calculation of linear terms coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*") - expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), 2e-6) + expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), + tolerance = float_tolerance) } }) @@ -171,14 +176,16 @@ if (grepl('Windows', Sys.info()[['sysname']]) || # check that lossless conversion works with 17 digits # numeric -> character -> numeric X <- 10^runif(100, -20, 20) - X2X <- as.numeric(format(X, digits = 17)) - expect_identical(X, X2X) + if (capabilities('long.double')) { + X2X <- as.numeric(format(X, digits = 17)) + expect_identical(X, X2X) + } # retrieved attributes to be the same as written for (x in X) { xgb.attr(bst.Tree, "x") <- x - expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x) + expect_equal(as.numeric(xgb.attr(bst.Tree, "x")), x, tolerance = float_tolerance) xgb.attributes(bst.Tree) <- list(a = "A", b = x) - expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x) + expect_equal(as.numeric(xgb.attr(bst.Tree, "b")), x, tolerance = float_tolerance) } }) } @@ -187,7 +194,7 @@ test_that("xgb.Booster serializing as R object works", { saveRDS(bst.Tree, 'xgb.model.rds') bst <- readRDS('xgb.model.rds') dtrain <- xgb.DMatrix(sparse_matrix, label = label) - expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain)) + expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance) expect_equal(xgb.dump(bst.Tree), xgb.dump(bst)) xgb.save(bst, 'xgb.model') nil_ptr <- new("externalptr") @@ -195,7 +202,7 @@ test_that("xgb.Booster serializing as R object works", { expect_true(identical(bst$handle, nil_ptr)) bst <- xgb.Booster.complete(bst) expect_true(!identical(bst$handle, nil_ptr)) - expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain)) + expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance) }) test_that("xgb.model.dt.tree works with and without feature names", { @@ -233,13 +240,14 @@ test_that("xgb.importance works with and without feature names", { expect_output(str(importance.Tree), 'Feature.*\\"Age\\"') importance.Tree.0 <- xgb.importance(model = bst.Tree) - expect_equal(importance.Tree, importance.Tree.0) + expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance) # when model contains no feature names: bst.Tree.x <- bst.Tree bst.Tree.x$feature_names <- NULL importance.Tree.x <- xgb.importance(model = bst.Tree) - expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE]) + expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE], + tolerance = float_tolerance) imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree) expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))