A fix for CRAN submission of version 0.7-0 (#3061)
* modify test_helper.R * fix noLD * update desc * fix solaris test * fix desc * improve fix * fix url
This commit is contained in:
parent
c88bae112e
commit
98be9aef9a
@ -1,8 +1,8 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 0.6.4.8
|
||||
Date: 2017-12-05
|
||||
Version: 0.7.0
|
||||
Date: 2018-01-22
|
||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
||||
Yuan Tang <terrytangyuan@gmail.com>
|
||||
|
||||
@ -121,7 +121,7 @@
|
||||
#' \itemize{
|
||||
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||
#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
|
||||
#' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
||||
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||
#' Different threshold (e.g., 0.) could be specified as "error@0."
|
||||
@ -351,8 +351,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
if (inherits(xgb_model, 'xgb.Booster') &&
|
||||
!is_update &&
|
||||
!is.null(xgb_model$evaluation_log) &&
|
||||
all.equal(colnames(evaluation_log),
|
||||
colnames(xgb_model$evaluation_log))) {
|
||||
isTRUE(all.equal(colnames(evaluation_log),
|
||||
colnames(xgb_model$evaluation_log)))) {
|
||||
evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
|
||||
}
|
||||
bst$evaluation_log <- evaluation_log
|
||||
|
||||
@ -179,7 +179,7 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi
|
||||
\itemize{
|
||||
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
|
||||
\item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
||||
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||
Different threshold (e.g., 0.) could be specified as "error@0."
|
||||
|
||||
@ -19,10 +19,10 @@ extern SEXP XGBoosterBoostOneIter_R(SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterCreate_R(SEXP);
|
||||
extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
|
||||
extern SEXP XGBoosterGetAttrNames_R(SEXP);
|
||||
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
|
||||
extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
|
||||
extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
|
||||
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
|
||||
extern SEXP XGBoosterModelToRaw_R(SEXP);
|
||||
extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
|
||||
@ -45,10 +45,10 @@ static const R_CallMethodDef CallEntries[] = {
|
||||
{"XGBoosterCreate_R", (DL_FUNC) &XGBoosterCreate_R, 1},
|
||||
{"XGBoosterDumpModel_R", (DL_FUNC) &XGBoosterDumpModel_R, 4},
|
||||
{"XGBoosterEvalOneIter_R", (DL_FUNC) &XGBoosterEvalOneIter_R, 4},
|
||||
{"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2},
|
||||
{"XGBoosterGetAttrNames_R", (DL_FUNC) &XGBoosterGetAttrNames_R, 1},
|
||||
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2},
|
||||
{"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2},
|
||||
{"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
|
||||
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2},
|
||||
{"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1},
|
||||
{"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4},
|
||||
{"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2},
|
||||
|
||||
@ -11,6 +11,7 @@ set.seed(1994)
|
||||
# disable some tests for Win32
|
||||
windows_flag = .Platform$OS.type == "windows" &&
|
||||
.Machine$sizeof.pointer != 8
|
||||
solaris_flag = (Sys.info()['sysname'] == "SunOS")
|
||||
|
||||
test_that("train and predict binary classification", {
|
||||
nrounds = 2
|
||||
@ -152,20 +153,20 @@ test_that("training continuation works", {
|
||||
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||
# continue for two more:
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
||||
if (!windows_flag)
|
||||
if (!windows_flag && !solaris_flag)
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
expect_false(is.null(bst2$evaluation_log))
|
||||
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
||||
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
||||
# test continuing from raw model data
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
||||
if (!windows_flag)
|
||||
if (!windows_flag && !solaris_flag)
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||
# test continuing from a model in file
|
||||
xgb.save(bst1, "xgboost.model")
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
|
||||
if (!windows_flag)
|
||||
if (!windows_flag && !solaris_flag)
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||
})
|
||||
|
||||
@ -5,6 +5,8 @@ require(data.table)
|
||||
require(Matrix)
|
||||
require(vcd, quietly = TRUE)
|
||||
|
||||
float_tolerance = 5e-6
|
||||
|
||||
set.seed(1982)
|
||||
data(Arthritis)
|
||||
df <- data.table(Arthritis, keep.rownames = F)
|
||||
@ -85,7 +87,8 @@ test_that("predict feature contributions works", {
|
||||
X <- sparse_matrix
|
||||
colnames(X) <- NULL
|
||||
expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
|
||||
expect_equal(pred_contr, pred_contr_, check.attributes = FALSE)
|
||||
expect_equal(pred_contr, pred_contr_, check.attributes = FALSE,
|
||||
tolerance = float_tolerance)
|
||||
|
||||
# gbtree binary classifier (approximate method)
|
||||
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
|
||||
@ -104,7 +107,8 @@ test_that("predict feature contributions works", {
|
||||
coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
|
||||
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
|
||||
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
|
||||
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5)
|
||||
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
|
||||
tolerance = float_tolerance)
|
||||
|
||||
# gbtree multiclass
|
||||
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
||||
@ -123,11 +127,12 @@ test_that("predict feature contributions works", {
|
||||
coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
|
||||
for (g in seq_along(pred_contr)) {
|
||||
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6)
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
|
||||
# manual calculation of linear terms
|
||||
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
|
||||
pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
|
||||
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), 2e-6)
|
||||
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
|
||||
tolerance = float_tolerance)
|
||||
}
|
||||
})
|
||||
|
||||
@ -171,14 +176,16 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
|
||||
# check that lossless conversion works with 17 digits
|
||||
# numeric -> character -> numeric
|
||||
X <- 10^runif(100, -20, 20)
|
||||
X2X <- as.numeric(format(X, digits = 17))
|
||||
expect_identical(X, X2X)
|
||||
if (capabilities('long.double')) {
|
||||
X2X <- as.numeric(format(X, digits = 17))
|
||||
expect_identical(X, X2X)
|
||||
}
|
||||
# retrieved attributes to be the same as written
|
||||
for (x in X) {
|
||||
xgb.attr(bst.Tree, "x") <- x
|
||||
expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
|
||||
expect_equal(as.numeric(xgb.attr(bst.Tree, "x")), x, tolerance = float_tolerance)
|
||||
xgb.attributes(bst.Tree) <- list(a = "A", b = x)
|
||||
expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
|
||||
expect_equal(as.numeric(xgb.attr(bst.Tree, "b")), x, tolerance = float_tolerance)
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -187,7 +194,7 @@ test_that("xgb.Booster serializing as R object works", {
|
||||
saveRDS(bst.Tree, 'xgb.model.rds')
|
||||
bst <- readRDS('xgb.model.rds')
|
||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
||||
xgb.save(bst, 'xgb.model')
|
||||
nil_ptr <- new("externalptr")
|
||||
@ -195,7 +202,7 @@ test_that("xgb.Booster serializing as R object works", {
|
||||
expect_true(identical(bst$handle, nil_ptr))
|
||||
bst <- xgb.Booster.complete(bst)
|
||||
expect_true(!identical(bst$handle, nil_ptr))
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||
})
|
||||
|
||||
test_that("xgb.model.dt.tree works with and without feature names", {
|
||||
@ -233,13 +240,14 @@ test_that("xgb.importance works with and without feature names", {
|
||||
expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
|
||||
|
||||
importance.Tree.0 <- xgb.importance(model = bst.Tree)
|
||||
expect_equal(importance.Tree, importance.Tree.0)
|
||||
expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)
|
||||
|
||||
# when model contains no feature names:
|
||||
bst.Tree.x <- bst.Tree
|
||||
bst.Tree.x$feature_names <- NULL
|
||||
importance.Tree.x <- xgb.importance(model = bst.Tree)
|
||||
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
|
||||
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE],
|
||||
tolerance = float_tolerance)
|
||||
|
||||
imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
|
||||
expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user