[R] many minor changes to increase the robustness of the R code (#2404)

* many minor changes to increase robustness of R code

* fixing which mistake in xgb.model.dt.tree.R and a few cosmetics
This commit is contained in:
Bernie Gray 2017-06-15 23:56:23 -04:00 committed by Vadim Khotilovich
parent 0db37c05bd
commit cd7659937b
8 changed files with 23 additions and 23 deletions

View File

@ -127,7 +127,7 @@ cb.evaluation.log <- function() {
# rearrange col order from _mean,_mean,...,_std,_std,...
# to be _mean,_std,_mean,_std,...
len <- length(mnames)
means <- mnames[1:(len/2)]
means <- mnames[seq_len(len/2)]
stds <- mnames[(len/2 + 1):len]
cnames <- numeric(len)
cnames[c(TRUE, FALSE)] <- means
@ -320,9 +320,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
metric_name <<- eval_names[metric_idx]
# maximixe is usually NULL when not set in xgb.train and built-in metrics
# maximize is usually NULL when not set in xgb.train and built-in metrics
if (is.null(maximize))
maximize <<- ifelse(grepl('(_auc|_map|_ndcg)', metric_name), TRUE, FALSE)
maximize <<- grepl('(_auc|_map|_ndcg)', metric_name)
if (verbose && NVL(env$rank, 0) == 0)
cat("Will train until ", metric_name, " hasn't improved in ",
@ -597,7 +597,7 @@ has.callbacks <- function(cb_list, query_names) {
return(FALSE)
if (!is.list(cb_list) ||
any(sapply(cb_list, class) != 'function')) {
stop('`cb_list`` must be a list of callback functions')
stop('`cb_list` must be a list of callback functions')
}
cb_names <- callback.names(cb_list)
if (!is.character(cb_names) ||

View File

@ -185,7 +185,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
"\tConsider providing pre-computed CV-folds through the 'folds=' parameter.\n")
}
# shuffle
rnd_idx <- sample(1:nrows)
rnd_idx <- sample.int(nrows)
if (stratified &&
length(label) == length(rnd_idx)) {
y <- label[rnd_idx]
@ -211,9 +211,9 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# make simple non-stratified folds
kstep <- length(rnd_idx) %/% nfold
folds <- list()
for (i in 1:(nfold - 1)) {
folds[[i]] <- rnd_idx[1:kstep]
rnd_idx <- rnd_idx[-(1:kstep)]
for (i in seq_len(nfold - 1)) {
folds[[i]] <- rnd_idx[seq_len(kstep)]
rnd_idx <- rnd_idx[-seq_len(kstep)]
}
folds[[nfold]] <- rnd_idx
}
@ -254,15 +254,15 @@ xgb.createFolds <- function(y, k = 10)
## For each class, balance the fold allocation as far
## as possible, then resample the remainder.
## The final assignment of folds is also randomized.
for (i in 1:length(numInClass)) {
for (i in seq_along(numInClass)) {
## create a vector of integers from 1:k as many times as possible without
## going over the number of samples in the class. Note that if the number
## of samples in a class is less than k, nothing is producd here.
seqVector <- rep(1:k, numInClass[i] %/% k)
seqVector <- rep(seq_len(k), numInClass[i] %/% k)
## add enough random integers to get length(seqVector) == numInClass[i]
if (numInClass[i] %% k > 0) seqVector <- c(seqVector, sample(1:k, numInClass[i] %% k))
if (numInClass[i] %% k > 0) seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k))
## shuffle the integers for fold assignment and assign to this classes's data
foldVector[which(y == dimnames(numInClass)$y[i])] <- sample(seqVector)
foldVector[y == dimnames(numInClass)$y[i]] <- sample(seqVector)
}
} else {
foldVector <- seq(along = y)

View File

@ -2,7 +2,7 @@
# internal utility function
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list" ||
!all(sapply(cachelist, inherits, 'xgb.DMatrix'))) {
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
stop("cachelist must be a list of xgb.DMatrix objects")
}
@ -296,9 +296,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
} else if (n_group == 1) {
matrix(ret, nrow = n_row, byrow = TRUE, dimnames = dnames)
} else {
grp_mask <- rep(1:n_col1, n_row) +
rep((0:(n_row - 1)) * n_col1 * n_group, each = n_col1)
lapply(1:n_group, function(g) {
grp_mask <- rep(seq_len(n_col1), n_row) +
rep((seq_len(n_row) - 1) * n_col1 * n_group, each = n_col1)
lapply(seq_len(n_group), function(g) {
matrix(ret[grp_mask + n_col1 * (g - 1)], nrow = n_row, byrow = TRUE, dimnames = dnames)
})
}

View File

@ -305,7 +305,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
ind <- which(len == nr)
if (length(ind) > 0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
for (i in seq_along(ind)) {
attr(ret, nms[i]) <- attr(object, nms[i])[idxset]
}
}

View File

@ -178,7 +178,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# create the booster-folds
dall <- xgb.get.DMatrix(data, label, missing)
bst_folds <- lapply(1:length(folds), function(k) {
bst_folds <- lapply(seq_along(folds), function(k) {
dtest <- slice(dall, folds[[k]])
dtrain <- slice(dall, unlist(folds[-k]))
handle <- xgb.Booster.handle(params, list(dtrain, dtest))

View File

@ -115,7 +115,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
} else {
data.table(Feature = rep(feature_names, each = num_class),
Weight = weights,
Class = 0:(num_class - 1))[order(Class, -abs(Weight))]
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
}
} else {
# tree model

View File

@ -65,7 +65,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
trees = NULL, use_int_id = FALSE, ...){
check.deprecation(...)
if (!inherits(model, "xgb.Booster") & !is.character(text)) {
if (!inherits(model, "xgb.Booster") && !is.character(text)) {
stop("Either 'model' must be an object of class xgb.Booster\n",
" or 'text' must be a character vector with the result of xgb.dump\n",
" (or NULL if 'model' was provided).")
@ -121,7 +121,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
# skip some indices with spurious capture groups from anynumber_regex
xtr <- stri_match_first_regex(t, branch_rx)[, c(2,3,5,6,7,8,10), drop = FALSE]
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
lapply(1:ncol(xtr), function(i) xtr[,i])
lapply(seq_len(ncol(xtr)), function(i) xtr[,i])
}]
# assign feature_names when available
if (!is.null(feature_names)) {
@ -136,7 +136,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
td[isLeaf == TRUE,
(leaf_cols) := {
xtr <- stri_match_first_regex(t, leaf_rx)[, c(2,4)]
c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i]))
c("Leaf", lapply(seq_len(ncol(xtr)), function(i) xtr[,i]))
}]
# convert some columns to numeric

View File

@ -251,7 +251,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
stop("second argument dtrain must be xgb.DMatrix")
if (length(watchlist) > 0) {
if (typeof(watchlist) != "list" ||
!all(sapply(watchlist, inherits, 'xgb.DMatrix')))
!all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
stop("watchlist must be a list of xgb.DMatrix elements")
evnames <- names(watchlist)
if (is.null(evnames) || any(evnames == ""))