[R] many minor changes to increase the robustness of the R code (#2404)
* many minor changes to increase robustness of R code * fixing which mistake in xgb.model.dt.tree.R and a few cosmetics
This commit is contained in:
parent
0db37c05bd
commit
cd7659937b
@ -127,7 +127,7 @@ cb.evaluation.log <- function() {
|
||||
# rearrange col order from _mean,_mean,...,_std,_std,...
|
||||
# to be _mean,_std,_mean,_std,...
|
||||
len <- length(mnames)
|
||||
means <- mnames[1:(len/2)]
|
||||
means <- mnames[seq_len(len/2)]
|
||||
stds <- mnames[(len/2 + 1):len]
|
||||
cnames <- numeric(len)
|
||||
cnames[c(TRUE, FALSE)] <- means
|
||||
@ -320,9 +320,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
|
||||
metric_name <<- eval_names[metric_idx]
|
||||
|
||||
# maximixe is usually NULL when not set in xgb.train and built-in metrics
|
||||
# maximize is usually NULL when not set in xgb.train and built-in metrics
|
||||
if (is.null(maximize))
|
||||
maximize <<- ifelse(grepl('(_auc|_map|_ndcg)', metric_name), TRUE, FALSE)
|
||||
maximize <<- grepl('(_auc|_map|_ndcg)', metric_name)
|
||||
|
||||
if (verbose && NVL(env$rank, 0) == 0)
|
||||
cat("Will train until ", metric_name, " hasn't improved in ",
|
||||
@ -597,7 +597,7 @@ has.callbacks <- function(cb_list, query_names) {
|
||||
return(FALSE)
|
||||
if (!is.list(cb_list) ||
|
||||
any(sapply(cb_list, class) != 'function')) {
|
||||
stop('`cb_list`` must be a list of callback functions')
|
||||
stop('`cb_list` must be a list of callback functions')
|
||||
}
|
||||
cb_names <- callback.names(cb_list)
|
||||
if (!is.character(cb_names) ||
|
||||
|
||||
@ -185,7 +185,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
||||
"\tConsider providing pre-computed CV-folds through the 'folds=' parameter.\n")
|
||||
}
|
||||
# shuffle
|
||||
rnd_idx <- sample(1:nrows)
|
||||
rnd_idx <- sample.int(nrows)
|
||||
if (stratified &&
|
||||
length(label) == length(rnd_idx)) {
|
||||
y <- label[rnd_idx]
|
||||
@ -211,9 +211,9 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
||||
# make simple non-stratified folds
|
||||
kstep <- length(rnd_idx) %/% nfold
|
||||
folds <- list()
|
||||
for (i in 1:(nfold - 1)) {
|
||||
folds[[i]] <- rnd_idx[1:kstep]
|
||||
rnd_idx <- rnd_idx[-(1:kstep)]
|
||||
for (i in seq_len(nfold - 1)) {
|
||||
folds[[i]] <- rnd_idx[seq_len(kstep)]
|
||||
rnd_idx <- rnd_idx[-seq_len(kstep)]
|
||||
}
|
||||
folds[[nfold]] <- rnd_idx
|
||||
}
|
||||
@ -254,15 +254,15 @@ xgb.createFolds <- function(y, k = 10)
|
||||
## For each class, balance the fold allocation as far
|
||||
## as possible, then resample the remainder.
|
||||
## The final assignment of folds is also randomized.
|
||||
for (i in 1:length(numInClass)) {
|
||||
for (i in seq_along(numInClass)) {
|
||||
## create a vector of integers from 1:k as many times as possible without
|
||||
## going over the number of samples in the class. Note that if the number
|
||||
## of samples in a class is less than k, nothing is producd here.
|
||||
seqVector <- rep(1:k, numInClass[i] %/% k)
|
||||
seqVector <- rep(seq_len(k), numInClass[i] %/% k)
|
||||
## add enough random integers to get length(seqVector) == numInClass[i]
|
||||
if (numInClass[i] %% k > 0) seqVector <- c(seqVector, sample(1:k, numInClass[i] %% k))
|
||||
if (numInClass[i] %% k > 0) seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k))
|
||||
## shuffle the integers for fold assignment and assign to this classes's data
|
||||
foldVector[which(y == dimnames(numInClass)$y[i])] <- sample(seqVector)
|
||||
foldVector[y == dimnames(numInClass)$y[i]] <- sample(seqVector)
|
||||
}
|
||||
} else {
|
||||
foldVector <- seq(along = y)
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# internal utility function
|
||||
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list" ||
|
||||
!all(sapply(cachelist, inherits, 'xgb.DMatrix'))) {
|
||||
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
|
||||
stop("cachelist must be a list of xgb.DMatrix objects")
|
||||
}
|
||||
|
||||
@ -296,9 +296,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
} else if (n_group == 1) {
|
||||
matrix(ret, nrow = n_row, byrow = TRUE, dimnames = dnames)
|
||||
} else {
|
||||
grp_mask <- rep(1:n_col1, n_row) +
|
||||
rep((0:(n_row - 1)) * n_col1 * n_group, each = n_col1)
|
||||
lapply(1:n_group, function(g) {
|
||||
grp_mask <- rep(seq_len(n_col1), n_row) +
|
||||
rep((seq_len(n_row) - 1) * n_col1 * n_group, each = n_col1)
|
||||
lapply(seq_len(n_group), function(g) {
|
||||
matrix(ret[grp_mask + n_col1 * (g - 1)], nrow = n_row, byrow = TRUE, dimnames = dnames)
|
||||
})
|
||||
}
|
||||
|
||||
@ -305,7 +305,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
||||
ind <- which(len == nr)
|
||||
if (length(ind) > 0) {
|
||||
nms <- names(attr_list)[ind]
|
||||
for (i in 1:length(ind)) {
|
||||
for (i in seq_along(ind)) {
|
||||
attr(ret, nms[i]) <- attr(object, nms[i])[idxset]
|
||||
}
|
||||
}
|
||||
|
||||
@ -178,7 +178,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
|
||||
# create the booster-folds
|
||||
dall <- xgb.get.DMatrix(data, label, missing)
|
||||
bst_folds <- lapply(1:length(folds), function(k) {
|
||||
bst_folds <- lapply(seq_along(folds), function(k) {
|
||||
dtest <- slice(dall, folds[[k]])
|
||||
dtrain <- slice(dall, unlist(folds[-k]))
|
||||
handle <- xgb.Booster.handle(params, list(dtrain, dtest))
|
||||
|
||||
@ -115,7 +115,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
||||
} else {
|
||||
data.table(Feature = rep(feature_names, each = num_class),
|
||||
Weight = weights,
|
||||
Class = 0:(num_class - 1))[order(Class, -abs(Weight))]
|
||||
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
|
||||
}
|
||||
} else {
|
||||
# tree model
|
||||
|
||||
@ -65,7 +65,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
trees = NULL, use_int_id = FALSE, ...){
|
||||
check.deprecation(...)
|
||||
|
||||
if (!inherits(model, "xgb.Booster") & !is.character(text)) {
|
||||
if (!inherits(model, "xgb.Booster") && !is.character(text)) {
|
||||
stop("Either 'model' must be an object of class xgb.Booster\n",
|
||||
" or 'text' must be a character vector with the result of xgb.dump\n",
|
||||
" (or NULL if 'model' was provided).")
|
||||
@ -121,7 +121,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
# skip some indices with spurious capture groups from anynumber_regex
|
||||
xtr <- stri_match_first_regex(t, branch_rx)[, c(2,3,5,6,7,8,10), drop = FALSE]
|
||||
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
|
||||
lapply(1:ncol(xtr), function(i) xtr[,i])
|
||||
lapply(seq_len(ncol(xtr)), function(i) xtr[,i])
|
||||
}]
|
||||
# assign feature_names when available
|
||||
if (!is.null(feature_names)) {
|
||||
@ -136,7 +136,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
td[isLeaf == TRUE,
|
||||
(leaf_cols) := {
|
||||
xtr <- stri_match_first_regex(t, leaf_rx)[, c(2,4)]
|
||||
c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i]))
|
||||
c("Leaf", lapply(seq_len(ncol(xtr)), function(i) xtr[,i]))
|
||||
}]
|
||||
|
||||
# convert some columns to numeric
|
||||
|
||||
@ -251,7 +251,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
stop("second argument dtrain must be xgb.DMatrix")
|
||||
if (length(watchlist) > 0) {
|
||||
if (typeof(watchlist) != "list" ||
|
||||
!all(sapply(watchlist, inherits, 'xgb.DMatrix')))
|
||||
!all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
|
||||
stop("watchlist must be a list of xgb.DMatrix elements")
|
||||
evnames <- names(watchlist)
|
||||
if (is.null(evnames) || any(evnames == ""))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user