A fix for CRAN submission of version 0.7-0 (#3061)

* modify test_helper.R * fix noLD * update desc * fix solaris test * fix desc * improve fix * fix url
2018-01-27 17:06:28 -08:00 · 2018-01-27 17:06:28 -08:00 · 98be9aef9a
commit 98be9aef9a
parent c88bae112e
6 changed files with 34 additions and 25 deletions
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 0.6.4.8
-Date: 2017-12-05
+Version: 0.7.0
+Date: 2018-01-22
 Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
    Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
    Yuan Tang <terrytangyuan@gmail.com>
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -121,7 +121,7 @@
 #'   \itemize{
 #'      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
 #'      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-#'      \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
+#'      \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
 #'      \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
 #'            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
 #'            Different threshold (e.g., 0.) could be specified as "error@0."
@ -351,8 +351,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    if (inherits(xgb_model, 'xgb.Booster') &&
        !is_update &&
        !is.null(xgb_model$evaluation_log) &&
-        all.equal(colnames(evaluation_log),
-                  colnames(xgb_model$evaluation_log))) {
+        isTRUE(all.equal(colnames(evaluation_log),
+                         colnames(xgb_model$evaluation_log)))) {
      evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
    }
    bst$evaluation_log <- evaluation_log
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@ -179,7 +179,7 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi
  \itemize{
     \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
     \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-     \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
+     \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
     \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
           By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
           Different threshold (e.g., 0.) could be specified as "error@0."
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@ -19,10 +19,10 @@ extern SEXP XGBoosterBoostOneIter_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterCreate_R(SEXP);
 extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
-extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
 extern SEXP XGBoosterGetAttrNames_R(SEXP);
-extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
+extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
 extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
+extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
 extern SEXP XGBoosterModelToRaw_R(SEXP);
 extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
@ -45,10 +45,10 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterCreate_R",           (DL_FUNC) &XGBoosterCreate_R,           1},
  {"XGBoosterDumpModel_R",        (DL_FUNC) &XGBoosterDumpModel_R,        4},
  {"XGBoosterEvalOneIter_R",      (DL_FUNC) &XGBoosterEvalOneIter_R,      4},
-  {"XGBoosterGetAttr_R",          (DL_FUNC) &XGBoosterGetAttr_R,          2},
  {"XGBoosterGetAttrNames_R",     (DL_FUNC) &XGBoosterGetAttrNames_R,     1},
-  {"XGBoosterLoadModel_R",        (DL_FUNC) &XGBoosterLoadModel_R,        2},
+  {"XGBoosterGetAttr_R",          (DL_FUNC) &XGBoosterGetAttr_R,          2},
  {"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
+  {"XGBoosterLoadModel_R",        (DL_FUNC) &XGBoosterLoadModel_R,        2},
  {"XGBoosterModelToRaw_R",       (DL_FUNC) &XGBoosterModelToRaw_R,       1},
  {"XGBoosterPredict_R",          (DL_FUNC) &XGBoosterPredict_R,          4},
  {"XGBoosterSaveModel_R",        (DL_FUNC) &XGBoosterSaveModel_R,        2},
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -11,6 +11,7 @@ set.seed(1994)
 # disable some tests for Win32
 windows_flag = .Platform$OS.type == "windows" &&
               .Machine$sizeof.pointer != 8
+solaris_flag = (Sys.info()['sysname'] == "SunOS")

 test_that("train and predict binary classification", {
  nrounds = 2
@ -152,20 +153,20 @@ test_that("training continuation works", {
  bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
  # continue for two more:
  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
-  if (!windows_flag)
+  if (!windows_flag && !solaris_flag)
    expect_equal(bst$raw, bst2$raw)
  expect_false(is.null(bst2$evaluation_log))
  expect_equal(dim(bst2$evaluation_log), c(4, 2))
  expect_equal(bst2$evaluation_log, bst$evaluation_log)
  # test continuing from raw model data
  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
-  if (!windows_flag)
+  if (!windows_flag && !solaris_flag)
    expect_equal(bst$raw, bst2$raw)
  expect_equal(dim(bst2$evaluation_log), c(2, 2))
  # test continuing from a model in file
  xgb.save(bst1, "xgboost.model")
  bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
-  if (!windows_flag)
+  if (!windows_flag && !solaris_flag)
    expect_equal(bst$raw, bst2$raw)
  expect_equal(dim(bst2$evaluation_log), c(2, 2))
 })
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -5,6 +5,8 @@ require(data.table)
 require(Matrix)
 require(vcd, quietly = TRUE)

+float_tolerance = 5e-6
+
 set.seed(1982)
 data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = F)
@ -85,7 +87,8 @@ test_that("predict feature contributions works", {
  X <- sparse_matrix
  colnames(X) <- NULL
  expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
-  expect_equal(pred_contr, pred_contr_, check.attributes = FALSE)
+  expect_equal(pred_contr, pred_contr_, check.attributes = FALSE,
+               tolerance = float_tolerance)

  # gbtree binary classifier (approximate method)
  expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
@ -104,7 +107,8 @@ test_that("predict feature contributions works", {
  coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
  coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
  pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
-  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5)
+  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
+               tolerance = float_tolerance)

  # gbtree multiclass
  pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
@ -123,11 +127,12 @@ test_that("predict feature contributions works", {
  coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
  for (g in seq_along(pred_contr)) {
    expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
-    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6)
+    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
    # manual calculation of linear terms
    coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
    pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
-    expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), 2e-6)
+    expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
+                 tolerance = float_tolerance)
  }
 })

@ -171,14 +176,16 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
      # check that lossless conversion works with 17 digits
      # numeric -> character -> numeric
      X <- 10^runif(100, -20, 20)
+      if (capabilities('long.double')) {
          X2X <- as.numeric(format(X, digits = 17))
          expect_identical(X, X2X)
+      }
      # retrieved attributes to be the same as written
      for (x in X) {
        xgb.attr(bst.Tree, "x") <- x
-        expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
+        expect_equal(as.numeric(xgb.attr(bst.Tree, "x")), x, tolerance = float_tolerance)
        xgb.attributes(bst.Tree) <- list(a = "A", b = x)
-        expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
+        expect_equal(as.numeric(xgb.attr(bst.Tree, "b")), x, tolerance = float_tolerance)
      }
    })
 }
@ -187,7 +194,7 @@ test_that("xgb.Booster serializing as R object works", {
  saveRDS(bst.Tree, 'xgb.model.rds')
  bst <- readRDS('xgb.model.rds')
  dtrain <- xgb.DMatrix(sparse_matrix, label = label)
-  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
+  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
  expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
  xgb.save(bst, 'xgb.model')
  nil_ptr <- new("externalptr")
@ -195,7 +202,7 @@ test_that("xgb.Booster serializing as R object works", {
  expect_true(identical(bst$handle, nil_ptr))
  bst <- xgb.Booster.complete(bst)
  expect_true(!identical(bst$handle, nil_ptr))
-  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
+  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
 })

 test_that("xgb.model.dt.tree works with and without feature names", {
@ -233,13 +240,14 @@ test_that("xgb.importance works with and without feature names", {
  expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')

  importance.Tree.0 <- xgb.importance(model = bst.Tree)
-  expect_equal(importance.Tree, importance.Tree.0)
+  expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)

  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
  importance.Tree.x <- xgb.importance(model = bst.Tree)
-  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
+  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE],
+               tolerance = float_tolerance)

  imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
  expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))