A fix for CRAN submission of version 0.7-0 (#3061)

* modify test_helper.R

* fix noLD

* update desc

* fix solaris test

* fix desc

* improve fix

* fix url
This commit is contained in:
Tong He 2018-01-27 17:06:28 -08:00 committed by GitHub
parent c88bae112e
commit 98be9aef9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 34 additions and 25 deletions

View File

@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 0.6.4.8 Version: 0.7.0
Date: 2017-12-05 Date: 2018-01-22
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>, Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>, Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
Yuan Tang <terrytangyuan@gmail.com> Yuan Tang <terrytangyuan@gmail.com>

View File

@ -121,7 +121,7 @@
#' \itemize{ #' \itemize{
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} #' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} #' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/} #' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. #' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. #' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
#' Different threshold (e.g., 0.) could be specified as "error@0." #' Different threshold (e.g., 0.) could be specified as "error@0."
@ -351,8 +351,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (inherits(xgb_model, 'xgb.Booster') && if (inherits(xgb_model, 'xgb.Booster') &&
!is_update && !is_update &&
!is.null(xgb_model$evaluation_log) && !is.null(xgb_model$evaluation_log) &&
all.equal(colnames(evaluation_log), isTRUE(all.equal(colnames(evaluation_log),
colnames(xgb_model$evaluation_log))) { colnames(xgb_model$evaluation_log)))) {
evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log)) evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
} }
bst$evaluation_log <- evaluation_log bst$evaluation_log <- evaluation_log

View File

@ -179,7 +179,7 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi
\itemize{ \itemize{
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/} \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
Different threshold (e.g., 0.) could be specified as "error@0." Different threshold (e.g., 0.) could be specified as "error@0."

View File

@ -19,10 +19,10 @@ extern SEXP XGBoosterBoostOneIter_R(SEXP, SEXP, SEXP, SEXP);
extern SEXP XGBoosterCreate_R(SEXP); extern SEXP XGBoosterCreate_R(SEXP);
extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);
extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
extern SEXP XGBoosterGetAttrNames_R(SEXP); extern SEXP XGBoosterGetAttrNames_R(SEXP);
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP); extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP); extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
extern SEXP XGBoosterModelToRaw_R(SEXP); extern SEXP XGBoosterModelToRaw_R(SEXP);
extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
extern SEXP XGBoosterSaveModel_R(SEXP, SEXP); extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
@ -45,10 +45,10 @@ static const R_CallMethodDef CallEntries[] = {
{"XGBoosterCreate_R", (DL_FUNC) &XGBoosterCreate_R, 1}, {"XGBoosterCreate_R", (DL_FUNC) &XGBoosterCreate_R, 1},
{"XGBoosterDumpModel_R", (DL_FUNC) &XGBoosterDumpModel_R, 4}, {"XGBoosterDumpModel_R", (DL_FUNC) &XGBoosterDumpModel_R, 4},
{"XGBoosterEvalOneIter_R", (DL_FUNC) &XGBoosterEvalOneIter_R, 4}, {"XGBoosterEvalOneIter_R", (DL_FUNC) &XGBoosterEvalOneIter_R, 4},
{"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2},
{"XGBoosterGetAttrNames_R", (DL_FUNC) &XGBoosterGetAttrNames_R, 1}, {"XGBoosterGetAttrNames_R", (DL_FUNC) &XGBoosterGetAttrNames_R, 1},
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2}, {"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2},
{"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2}, {"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2},
{"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1}, {"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1},
{"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4}, {"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4},
{"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2}, {"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2},

View File

@ -11,6 +11,7 @@ set.seed(1994)
# disable some tests for Win32 # disable some tests for Win32
windows_flag = .Platform$OS.type == "windows" && windows_flag = .Platform$OS.type == "windows" &&
.Machine$sizeof.pointer != 8 .Machine$sizeof.pointer != 8
solaris_flag = (Sys.info()['sysname'] == "SunOS")
test_that("train and predict binary classification", { test_that("train and predict binary classification", {
nrounds = 2 nrounds = 2
@ -152,20 +153,20 @@ test_that("training continuation works", {
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0) bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
# continue for two more: # continue for two more:
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1) bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
if (!windows_flag) if (!windows_flag && !solaris_flag)
expect_equal(bst$raw, bst2$raw) expect_equal(bst$raw, bst2$raw)
expect_false(is.null(bst2$evaluation_log)) expect_false(is.null(bst2$evaluation_log))
expect_equal(dim(bst2$evaluation_log), c(4, 2)) expect_equal(dim(bst2$evaluation_log), c(4, 2))
expect_equal(bst2$evaluation_log, bst$evaluation_log) expect_equal(bst2$evaluation_log, bst$evaluation_log)
# test continuing from raw model data # test continuing from raw model data
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw) bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
if (!windows_flag) if (!windows_flag && !solaris_flag)
expect_equal(bst$raw, bst2$raw) expect_equal(bst$raw, bst2$raw)
expect_equal(dim(bst2$evaluation_log), c(2, 2)) expect_equal(dim(bst2$evaluation_log), c(2, 2))
# test continuing from a model in file # test continuing from a model in file
xgb.save(bst1, "xgboost.model") xgb.save(bst1, "xgboost.model")
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model") bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
if (!windows_flag) if (!windows_flag && !solaris_flag)
expect_equal(bst$raw, bst2$raw) expect_equal(bst$raw, bst2$raw)
expect_equal(dim(bst2$evaluation_log), c(2, 2)) expect_equal(dim(bst2$evaluation_log), c(2, 2))
}) })

View File

@ -5,6 +5,8 @@ require(data.table)
require(Matrix) require(Matrix)
require(vcd, quietly = TRUE) require(vcd, quietly = TRUE)
float_tolerance = 5e-6
set.seed(1982) set.seed(1982)
data(Arthritis) data(Arthritis)
df <- data.table(Arthritis, keep.rownames = F) df <- data.table(Arthritis, keep.rownames = F)
@ -85,7 +87,8 @@ test_that("predict feature contributions works", {
X <- sparse_matrix X <- sparse_matrix
colnames(X) <- NULL colnames(X) <- NULL
expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA) expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
expect_equal(pred_contr, pred_contr_, check.attributes = FALSE) expect_equal(pred_contr, pred_contr_, check.attributes = FALSE,
tolerance = float_tolerance)
# gbtree binary classifier (approximate method) # gbtree binary classifier (approximate method)
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA) expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
@ -104,7 +107,8 @@ test_that("predict feature contributions works", {
coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*") pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5) expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
tolerance = float_tolerance)
# gbtree multiclass # gbtree multiclass
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE) pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
@ -123,11 +127,12 @@ test_that("predict feature contributions works", {
coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE) coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
for (g in seq_along(pred_contr)) { for (g in seq_along(pred_contr)) {
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS")) expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6) expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
# manual calculation of linear terms # manual calculation of linear terms
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*") pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), 2e-6) expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
tolerance = float_tolerance)
} }
}) })
@ -171,14 +176,16 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
# check that lossless conversion works with 17 digits # check that lossless conversion works with 17 digits
# numeric -> character -> numeric # numeric -> character -> numeric
X <- 10^runif(100, -20, 20) X <- 10^runif(100, -20, 20)
if (capabilities('long.double')) {
X2X <- as.numeric(format(X, digits = 17)) X2X <- as.numeric(format(X, digits = 17))
expect_identical(X, X2X) expect_identical(X, X2X)
}
# retrieved attributes to be the same as written # retrieved attributes to be the same as written
for (x in X) { for (x in X) {
xgb.attr(bst.Tree, "x") <- x xgb.attr(bst.Tree, "x") <- x
expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x) expect_equal(as.numeric(xgb.attr(bst.Tree, "x")), x, tolerance = float_tolerance)
xgb.attributes(bst.Tree) <- list(a = "A", b = x) xgb.attributes(bst.Tree) <- list(a = "A", b = x)
expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x) expect_equal(as.numeric(xgb.attr(bst.Tree, "b")), x, tolerance = float_tolerance)
} }
}) })
} }
@ -187,7 +194,7 @@ test_that("xgb.Booster serializing as R object works", {
saveRDS(bst.Tree, 'xgb.model.rds') saveRDS(bst.Tree, 'xgb.model.rds')
bst <- readRDS('xgb.model.rds') bst <- readRDS('xgb.model.rds')
dtrain <- xgb.DMatrix(sparse_matrix, label = label) dtrain <- xgb.DMatrix(sparse_matrix, label = label)
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain)) expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst)) expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
xgb.save(bst, 'xgb.model') xgb.save(bst, 'xgb.model')
nil_ptr <- new("externalptr") nil_ptr <- new("externalptr")
@ -195,7 +202,7 @@ test_that("xgb.Booster serializing as R object works", {
expect_true(identical(bst$handle, nil_ptr)) expect_true(identical(bst$handle, nil_ptr))
bst <- xgb.Booster.complete(bst) bst <- xgb.Booster.complete(bst)
expect_true(!identical(bst$handle, nil_ptr)) expect_true(!identical(bst$handle, nil_ptr))
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain)) expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
}) })
test_that("xgb.model.dt.tree works with and without feature names", { test_that("xgb.model.dt.tree works with and without feature names", {
@ -233,13 +240,14 @@ test_that("xgb.importance works with and without feature names", {
expect_output(str(importance.Tree), 'Feature.*\\"Age\\"') expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
importance.Tree.0 <- xgb.importance(model = bst.Tree) importance.Tree.0 <- xgb.importance(model = bst.Tree)
expect_equal(importance.Tree, importance.Tree.0) expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)
# when model contains no feature names: # when model contains no feature names:
bst.Tree.x <- bst.Tree bst.Tree.x <- bst.Tree
bst.Tree.x$feature_names <- NULL bst.Tree.x$feature_names <- NULL
importance.Tree.x <- xgb.importance(model = bst.Tree) importance.Tree.x <- xgb.importance(model = bst.Tree)
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE]) expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE],
tolerance = float_tolerance)
imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree) imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance")) expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))