From 98be9aef9aab76ca8e1ca6c250c565dcdbcb7be0 Mon Sep 17 00:00:00 2001
From: Tong He <hetong007@gmail.com>
Date: Sat, 27 Jan 2018 17:06:28 -0800
Subject: [PATCH] A fix for CRAN submission of version 0.7-0 (#3061)

* modify test_helper.R

* fix noLD

* update desc

* fix solaris test

* fix desc

* improve fix

* fix url
---
 R-package/DESCRIPTION                   |  4 ++--
 R-package/R/xgb.train.R                 |  6 ++---
 R-package/man/xgb.train.Rd              |  2 +-
 R-package/src/init.c                    |  8 +++----
 R-package/tests/testthat/test_basic.R   |  7 +++---
 R-package/tests/testthat/test_helpers.R | 32 +++++++++++++++----------
 6 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index b5446d3b7..3fd72c27b 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 0.6.4.8
-Date: 2017-12-05
+Version: 0.7.0
+Date: 2018-01-22
 Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
     Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
     Yuan Tang <terrytangyuan@gmail.com>
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 73b79bebc..ab39fa0e9 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -121,7 +121,7 @@
 #'   \itemize{
 #'      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
 #'      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-#'      \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
+#'      \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
 #'      \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
 #'            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
 #'            Different threshold (e.g., 0.) could be specified as "error@0."
@@ -351,8 +351,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
     if (inherits(xgb_model, 'xgb.Booster') &&
         !is_update &&
         !is.null(xgb_model$evaluation_log) &&
-        all.equal(colnames(evaluation_log),
-                  colnames(xgb_model$evaluation_log))) {
+        isTRUE(all.equal(colnames(evaluation_log),
+                         colnames(xgb_model$evaluation_log)))) {
       evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
     }
     bst$evaluation_log <- evaluation_log
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index e4e4c145f..a4776f4fd 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -179,7 +179,7 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi
   \itemize{
      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-     \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
+     \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
      \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
            Different threshold (e.g., 0.) could be specified as "error@0."
diff --git a/R-package/src/init.c b/R-package/src/init.c
index c07fa99eb..bd493a2af 100644
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -19,10 +19,10 @@ extern SEXP XGBoosterBoostOneIter_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterCreate_R(SEXP);
 extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
-extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
 extern SEXP XGBoosterGetAttrNames_R(SEXP);
-extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
+extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
 extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
+extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
 extern SEXP XGBoosterModelToRaw_R(SEXP);
 extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
@@ -45,10 +45,10 @@ static const R_CallMethodDef CallEntries[] = {
   {"XGBoosterCreate_R",           (DL_FUNC) &XGBoosterCreate_R,           1},
   {"XGBoosterDumpModel_R",        (DL_FUNC) &XGBoosterDumpModel_R,        4},
   {"XGBoosterEvalOneIter_R",      (DL_FUNC) &XGBoosterEvalOneIter_R,      4},
-  {"XGBoosterGetAttr_R",          (DL_FUNC) &XGBoosterGetAttr_R,          2},
   {"XGBoosterGetAttrNames_R",     (DL_FUNC) &XGBoosterGetAttrNames_R,     1},
-  {"XGBoosterLoadModel_R",        (DL_FUNC) &XGBoosterLoadModel_R,        2},
+  {"XGBoosterGetAttr_R",          (DL_FUNC) &XGBoosterGetAttr_R,          2},
   {"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
+  {"XGBoosterLoadModel_R",        (DL_FUNC) &XGBoosterLoadModel_R,        2},
   {"XGBoosterModelToRaw_R",       (DL_FUNC) &XGBoosterModelToRaw_R,       1},
   {"XGBoosterPredict_R",          (DL_FUNC) &XGBoosterPredict_R,          4},
   {"XGBoosterSaveModel_R",        (DL_FUNC) &XGBoosterSaveModel_R,        2},
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 3b11a0614..e7a6679d3 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -11,6 +11,7 @@ set.seed(1994)
 # disable some tests for Win32
 windows_flag = .Platform$OS.type == "windows" &&
                .Machine$sizeof.pointer != 8
+solaris_flag = (Sys.info()['sysname'] == "SunOS")
 
 test_that("train and predict binary classification", {
   nrounds = 2
@@ -152,20 +153,20 @@ test_that("training continuation works", {
   bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
   # continue for two more:
   bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
-  if (!windows_flag)
+  if (!windows_flag && !solaris_flag)
     expect_equal(bst$raw, bst2$raw)
   expect_false(is.null(bst2$evaluation_log))
   expect_equal(dim(bst2$evaluation_log), c(4, 2))
   expect_equal(bst2$evaluation_log, bst$evaluation_log)
   # test continuing from raw model data
   bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
-  if (!windows_flag)
+  if (!windows_flag && !solaris_flag)
     expect_equal(bst$raw, bst2$raw)
   expect_equal(dim(bst2$evaluation_log), c(2, 2))
   # test continuing from a model in file
   xgb.save(bst1, "xgboost.model")
   bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
-  if (!windows_flag)
+  if (!windows_flag && !solaris_flag)
     expect_equal(bst$raw, bst2$raw)
   expect_equal(dim(bst2$evaluation_log), c(2, 2))
 })
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 67f4ab40b..2caf4bbc1 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -5,6 +5,8 @@ require(data.table)
 require(Matrix)
 require(vcd, quietly = TRUE)
 
+float_tolerance = 5e-6
+
 set.seed(1982)
 data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = F)
@@ -85,7 +87,8 @@ test_that("predict feature contributions works", {
   X <- sparse_matrix
   colnames(X) <- NULL
   expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
-  expect_equal(pred_contr, pred_contr_, check.attributes = FALSE)
+  expect_equal(pred_contr, pred_contr_, check.attributes = FALSE,
+               tolerance = float_tolerance)
 
   # gbtree binary classifier (approximate method)
   expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
@@ -104,7 +107,8 @@ test_that("predict feature contributions works", {
   coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
   coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
   pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
-  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5)
+  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
+               tolerance = float_tolerance)
 
   # gbtree multiclass
   pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
@@ -123,11 +127,12 @@ test_that("predict feature contributions works", {
   coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
   for (g in seq_along(pred_contr)) {
     expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
-    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6)
+    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
     # manual calculation of linear terms
     coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
     pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
-    expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), 2e-6)
+    expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
+                 tolerance = float_tolerance)
   }
 })
 
@@ -171,14 +176,16 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
       # check that lossless conversion works with 17 digits
       # numeric -> character -> numeric
       X <- 10^runif(100, -20, 20)
-      X2X <- as.numeric(format(X, digits = 17))
-      expect_identical(X, X2X)
+      if (capabilities('long.double')) {
+          X2X <- as.numeric(format(X, digits = 17))
+          expect_identical(X, X2X)
+      }
       # retrieved attributes to be the same as written
       for (x in X) {
         xgb.attr(bst.Tree, "x") <- x
-        expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
+        expect_equal(as.numeric(xgb.attr(bst.Tree, "x")), x, tolerance = float_tolerance)
         xgb.attributes(bst.Tree) <- list(a = "A", b = x)
-        expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
+        expect_equal(as.numeric(xgb.attr(bst.Tree, "b")), x, tolerance = float_tolerance)
       }
     })
 }
@@ -187,7 +194,7 @@ test_that("xgb.Booster serializing as R object works", {
   saveRDS(bst.Tree, 'xgb.model.rds')
   bst <- readRDS('xgb.model.rds')
   dtrain <- xgb.DMatrix(sparse_matrix, label = label)
-  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
+  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
   expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
   xgb.save(bst, 'xgb.model')
   nil_ptr <- new("externalptr")
@@ -195,7 +202,7 @@ test_that("xgb.Booster serializing as R object works", {
   expect_true(identical(bst$handle, nil_ptr))
   bst <- xgb.Booster.complete(bst)
   expect_true(!identical(bst$handle, nil_ptr))
-  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
+  expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
 })
 
 test_that("xgb.model.dt.tree works with and without feature names", {
@@ -233,13 +240,14 @@ test_that("xgb.importance works with and without feature names", {
   expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
 
   importance.Tree.0 <- xgb.importance(model = bst.Tree)
-  expect_equal(importance.Tree, importance.Tree.0)
+  expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)
 
   # when model contains no feature names:
   bst.Tree.x <- bst.Tree
   bst.Tree.x$feature_names <- NULL
   importance.Tree.x <- xgb.importance(model = bst.Tree)
-  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
+  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE],
+               tolerance = float_tolerance)
 
   imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
   expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))