A fix for CRAN submission of version 0.7-0 (#3061)
* modify test_helper.R * fix noLD * update desc * fix solaris test * fix desc * improve fix * fix url
This commit is contained in:
parent
c88bae112e
commit
98be9aef9a
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 0.6.4.8
|
Version: 0.7.0
|
||||||
Date: 2017-12-05
|
Date: 2018-01-22
|
||||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||||
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
||||||
Yuan Tang <terrytangyuan@gmail.com>
|
Yuan Tang <terrytangyuan@gmail.com>
|
||||||
|
|||||||
@ -121,7 +121,7 @@
|
|||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||||
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||||
#' \item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
|
#' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
||||||
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||||
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||||
#' Different threshold (e.g., 0.) could be specified as "error@0."
|
#' Different threshold (e.g., 0.) could be specified as "error@0."
|
||||||
@ -351,8 +351,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
if (inherits(xgb_model, 'xgb.Booster') &&
|
if (inherits(xgb_model, 'xgb.Booster') &&
|
||||||
!is_update &&
|
!is_update &&
|
||||||
!is.null(xgb_model$evaluation_log) &&
|
!is.null(xgb_model$evaluation_log) &&
|
||||||
all.equal(colnames(evaluation_log),
|
isTRUE(all.equal(colnames(evaluation_log),
|
||||||
colnames(xgb_model$evaluation_log))) {
|
colnames(xgb_model$evaluation_log)))) {
|
||||||
evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
|
evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
|
||||||
}
|
}
|
||||||
bst$evaluation_log <- evaluation_log
|
bst$evaluation_log <- evaluation_log
|
||||||
|
|||||||
@ -179,7 +179,7 @@ The folloiwing is the list of built-in metrics for which Xgboost provides optimi
|
|||||||
\itemize{
|
\itemize{
|
||||||
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||||
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||||
\item \code{mlogloss} multiclass logloss. \url{https://www.kaggle.com/wiki/MultiClassLogLoss/}
|
\item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
||||||
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||||
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||||
Different threshold (e.g., 0.) could be specified as "error@0."
|
Different threshold (e.g., 0.) could be specified as "error@0."
|
||||||
|
|||||||
@ -19,10 +19,10 @@ extern SEXP XGBoosterBoostOneIter_R(SEXP, SEXP, SEXP, SEXP);
|
|||||||
extern SEXP XGBoosterCreate_R(SEXP);
|
extern SEXP XGBoosterCreate_R(SEXP);
|
||||||
extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);
|
extern SEXP XGBoosterDumpModel_R(SEXP, SEXP, SEXP, SEXP);
|
||||||
extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
|
extern SEXP XGBoosterEvalOneIter_R(SEXP, SEXP, SEXP, SEXP);
|
||||||
extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
|
|
||||||
extern SEXP XGBoosterGetAttrNames_R(SEXP);
|
extern SEXP XGBoosterGetAttrNames_R(SEXP);
|
||||||
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
|
extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
|
||||||
extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
|
extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
|
||||||
|
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
|
||||||
extern SEXP XGBoosterModelToRaw_R(SEXP);
|
extern SEXP XGBoosterModelToRaw_R(SEXP);
|
||||||
extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
|
extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
|
||||||
extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
|
extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
|
||||||
@ -45,10 +45,10 @@ static const R_CallMethodDef CallEntries[] = {
|
|||||||
{"XGBoosterCreate_R", (DL_FUNC) &XGBoosterCreate_R, 1},
|
{"XGBoosterCreate_R", (DL_FUNC) &XGBoosterCreate_R, 1},
|
||||||
{"XGBoosterDumpModel_R", (DL_FUNC) &XGBoosterDumpModel_R, 4},
|
{"XGBoosterDumpModel_R", (DL_FUNC) &XGBoosterDumpModel_R, 4},
|
||||||
{"XGBoosterEvalOneIter_R", (DL_FUNC) &XGBoosterEvalOneIter_R, 4},
|
{"XGBoosterEvalOneIter_R", (DL_FUNC) &XGBoosterEvalOneIter_R, 4},
|
||||||
{"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2},
|
|
||||||
{"XGBoosterGetAttrNames_R", (DL_FUNC) &XGBoosterGetAttrNames_R, 1},
|
{"XGBoosterGetAttrNames_R", (DL_FUNC) &XGBoosterGetAttrNames_R, 1},
|
||||||
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2},
|
{"XGBoosterGetAttr_R", (DL_FUNC) &XGBoosterGetAttr_R, 2},
|
||||||
{"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
|
{"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
|
||||||
|
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2},
|
||||||
{"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1},
|
{"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1},
|
||||||
{"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4},
|
{"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4},
|
||||||
{"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2},
|
{"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2},
|
||||||
|
|||||||
@ -11,6 +11,7 @@ set.seed(1994)
|
|||||||
# disable some tests for Win32
|
# disable some tests for Win32
|
||||||
windows_flag = .Platform$OS.type == "windows" &&
|
windows_flag = .Platform$OS.type == "windows" &&
|
||||||
.Machine$sizeof.pointer != 8
|
.Machine$sizeof.pointer != 8
|
||||||
|
solaris_flag = (Sys.info()['sysname'] == "SunOS")
|
||||||
|
|
||||||
test_that("train and predict binary classification", {
|
test_that("train and predict binary classification", {
|
||||||
nrounds = 2
|
nrounds = 2
|
||||||
@ -152,20 +153,20 @@ test_that("training continuation works", {
|
|||||||
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||||
# continue for two more:
|
# continue for two more:
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
||||||
if (!windows_flag)
|
if (!windows_flag && !solaris_flag)
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
expect_false(is.null(bst2$evaluation_log))
|
expect_false(is.null(bst2$evaluation_log))
|
||||||
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
||||||
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
||||||
# test continuing from raw model data
|
# test continuing from raw model data
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
||||||
if (!windows_flag)
|
if (!windows_flag && !solaris_flag)
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||||
# test continuing from a model in file
|
# test continuing from a model in file
|
||||||
xgb.save(bst1, "xgboost.model")
|
xgb.save(bst1, "xgboost.model")
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.model")
|
||||||
if (!windows_flag)
|
if (!windows_flag && !solaris_flag)
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||||
})
|
})
|
||||||
|
|||||||
@ -5,6 +5,8 @@ require(data.table)
|
|||||||
require(Matrix)
|
require(Matrix)
|
||||||
require(vcd, quietly = TRUE)
|
require(vcd, quietly = TRUE)
|
||||||
|
|
||||||
|
float_tolerance = 5e-6
|
||||||
|
|
||||||
set.seed(1982)
|
set.seed(1982)
|
||||||
data(Arthritis)
|
data(Arthritis)
|
||||||
df <- data.table(Arthritis, keep.rownames = F)
|
df <- data.table(Arthritis, keep.rownames = F)
|
||||||
@ -85,7 +87,8 @@ test_that("predict feature contributions works", {
|
|||||||
X <- sparse_matrix
|
X <- sparse_matrix
|
||||||
colnames(X) <- NULL
|
colnames(X) <- NULL
|
||||||
expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
|
expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
|
||||||
expect_equal(pred_contr, pred_contr_, check.attributes = FALSE)
|
expect_equal(pred_contr, pred_contr_, check.attributes = FALSE,
|
||||||
|
tolerance = float_tolerance)
|
||||||
|
|
||||||
# gbtree binary classifier (approximate method)
|
# gbtree binary classifier (approximate method)
|
||||||
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
|
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
|
||||||
@ -104,7 +107,8 @@ test_that("predict feature contributions works", {
|
|||||||
coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
|
coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
|
||||||
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
|
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
|
||||||
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
|
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
|
||||||
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5)
|
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
|
||||||
|
tolerance = float_tolerance)
|
||||||
|
|
||||||
# gbtree multiclass
|
# gbtree multiclass
|
||||||
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
||||||
@ -123,11 +127,12 @@ test_that("predict feature contributions works", {
|
|||||||
coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
|
coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
|
||||||
for (g in seq_along(pred_contr)) {
|
for (g in seq_along(pred_contr)) {
|
||||||
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
|
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
|
||||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6)
|
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
|
||||||
# manual calculation of linear terms
|
# manual calculation of linear terms
|
||||||
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
|
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
|
||||||
pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
|
pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
|
||||||
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), 2e-6)
|
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
|
||||||
|
tolerance = float_tolerance)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -171,14 +176,16 @@ if (grepl('Windows', Sys.info()[['sysname']]) ||
|
|||||||
# check that lossless conversion works with 17 digits
|
# check that lossless conversion works with 17 digits
|
||||||
# numeric -> character -> numeric
|
# numeric -> character -> numeric
|
||||||
X <- 10^runif(100, -20, 20)
|
X <- 10^runif(100, -20, 20)
|
||||||
|
if (capabilities('long.double')) {
|
||||||
X2X <- as.numeric(format(X, digits = 17))
|
X2X <- as.numeric(format(X, digits = 17))
|
||||||
expect_identical(X, X2X)
|
expect_identical(X, X2X)
|
||||||
|
}
|
||||||
# retrieved attributes to be the same as written
|
# retrieved attributes to be the same as written
|
||||||
for (x in X) {
|
for (x in X) {
|
||||||
xgb.attr(bst.Tree, "x") <- x
|
xgb.attr(bst.Tree, "x") <- x
|
||||||
expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
|
expect_equal(as.numeric(xgb.attr(bst.Tree, "x")), x, tolerance = float_tolerance)
|
||||||
xgb.attributes(bst.Tree) <- list(a = "A", b = x)
|
xgb.attributes(bst.Tree) <- list(a = "A", b = x)
|
||||||
expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
|
expect_equal(as.numeric(xgb.attr(bst.Tree, "b")), x, tolerance = float_tolerance)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -187,7 +194,7 @@ test_that("xgb.Booster serializing as R object works", {
|
|||||||
saveRDS(bst.Tree, 'xgb.model.rds')
|
saveRDS(bst.Tree, 'xgb.model.rds')
|
||||||
bst <- readRDS('xgb.model.rds')
|
bst <- readRDS('xgb.model.rds')
|
||||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
|
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
|
||||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
|
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||||
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
nil_ptr <- new("externalptr")
|
nil_ptr <- new("externalptr")
|
||||||
@ -195,7 +202,7 @@ test_that("xgb.Booster serializing as R object works", {
|
|||||||
expect_true(identical(bst$handle, nil_ptr))
|
expect_true(identical(bst$handle, nil_ptr))
|
||||||
bst <- xgb.Booster.complete(bst)
|
bst <- xgb.Booster.complete(bst)
|
||||||
expect_true(!identical(bst$handle, nil_ptr))
|
expect_true(!identical(bst$handle, nil_ptr))
|
||||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain))
|
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.model.dt.tree works with and without feature names", {
|
test_that("xgb.model.dt.tree works with and without feature names", {
|
||||||
@ -233,13 +240,14 @@ test_that("xgb.importance works with and without feature names", {
|
|||||||
expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
|
expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
|
||||||
|
|
||||||
importance.Tree.0 <- xgb.importance(model = bst.Tree)
|
importance.Tree.0 <- xgb.importance(model = bst.Tree)
|
||||||
expect_equal(importance.Tree, importance.Tree.0)
|
expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)
|
||||||
|
|
||||||
# when model contains no feature names:
|
# when model contains no feature names:
|
||||||
bst.Tree.x <- bst.Tree
|
bst.Tree.x <- bst.Tree
|
||||||
bst.Tree.x$feature_names <- NULL
|
bst.Tree.x$feature_names <- NULL
|
||||||
importance.Tree.x <- xgb.importance(model = bst.Tree)
|
importance.Tree.x <- xgb.importance(model = bst.Tree)
|
||||||
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
|
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE],
|
||||||
|
tolerance = float_tolerance)
|
||||||
|
|
||||||
imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
|
imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
|
||||||
expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
|
expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user