diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R
index 9cc1867da..432581e76 100644
--- a/R-package/R/predict.xgb.Booster.R
+++ b/R-package/R/predict.xgb.Booster.R
@@ -48,7 +48,7 @@ setMethod("predict", signature = "xgb.Booster",
stop("predict: ntreelimit must be equal to or greater than 1")
}
}
- option = 0
+ option <- 0
if (outputmargin) {
option <- option + 1
}
diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R
index d8ef8cb9c..3b025e1dd 100644
--- a/R-package/R/slice.xgb.DMatrix.R
+++ b/R-package/R/slice.xgb.DMatrix.R
@@ -30,12 +30,12 @@ setMethod("slice", signature = "xgb.DMatrix",
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
PACKAGE = "xgboost")
-
+
attr_list <- attributes(object)
nr <- xgb.numrow(object)
len <- sapply(attr_list,length)
- ind <- which(len==nr)
- if (length(ind)>0) {
+ ind <- which(len == nr)
+ if (length(ind) > 0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 459eb068e..b4f4a371f 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -1,4 +1,4 @@
-#' @importClassesFrom Matrix dgCMatrix dgeMatrix
+ #' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @import methods
# depends on matrix
@@ -15,14 +15,14 @@ xgb.setinfo <- function(dmat, name, info) {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name == "label") {
- if (length(info)!=xgb.numrow(dmat))
+ if (length(info) != xgb.numrow(dmat))
stop("The length of labels must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "weight") {
- if (length(info)!=xgb.numrow(dmat))
+ if (length(info) != xgb.numrow(dmat))
stop("The length of weights must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
@@ -36,7 +36,7 @@ xgb.setinfo <- function(dmat, name, info) {
return(TRUE)
}
if (name == "group") {
- if (sum(info)!=xgb.numrow(dmat))
+ if (sum(info) != xgb.numrow(dmat))
stop("The sum of groups must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
PACKAGE = "xgboost")
@@ -68,7 +68,7 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(modelfile) == "character") {
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
} else if (typeof(modelfile) == "raw") {
- .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
+ .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else {
stop("xgb.Booster: modelfile must be character or raw vector")
}
@@ -122,7 +122,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
} else if (inClass == "xgb.DMatrix") {
dtrain <- data
} else if (inClass == "data.frame") {
- stop("xgboost only support numerical matrix input,
+ stop("xgboost only support numerical matrix input,
use 'data.frame' to transform the data.")
} else {
stop("xgboost: Invalid input of data")
@@ -156,7 +156,7 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
}
if (is.null(obj)) {
- .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
+ .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
PACKAGE = "xgboost")
} else {
pred <- predict(booster, dtrain)
@@ -189,9 +189,9 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL, prediction = F
}
evnames <- append(evnames, names(w))
}
- msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
+ msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
evnames, PACKAGE = "xgboost")
- } else {
+ } else {
msg <- paste("[", iter, "]", sep="")
for (j in 1:length(watchlist)) {
w <- watchlist[j]
@@ -247,11 +247,11 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
if (length(unique(y)) <= 5) y <- factor(y)
}
folds <- xgb.createFolds(y, nfold)
- } else {
+ } else {
# make simple non-stratified folds
kstep <- length(randidx) %/% nfold
folds <- list()
- for (i in 1:(nfold-1)) {
+ for (i in 1:(nfold - 1)) {
folds[[i]] <- randidx[1:kstep]
randidx <- setdiff(randidx, folds[[i]])
}
@@ -261,7 +261,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
ret <- list()
for (k in 1:nfold) {
dtest <- slice(dall, folds[[k]])
- didx = c()
+ didx <- c()
for (i in 1:nfold) {
if (i != k) {
didx <- append(didx, folds[[i]])
@@ -282,7 +282,7 @@ xgb.cv.aggcv <- function(res, showsd = TRUE) {
kv <- strsplit(header[i], ":")[[1]]
ret <- paste(ret, "\t", kv[1], ":", sep="")
stats <- c()
- stats[1] <- as.numeric(kv[2])
+ stats[1] <- as.numeric(kv[2])
for (j in 2:length(res)) {
tkv <- strsplit(res[[j]][i], ":")[[1]]
stats[j] <- as.numeric(tkv[2])
@@ -310,9 +310,9 @@ xgb.createFolds <- function(y, k = 10)
## At most, we will use quantiles. If the sample
## is too small, we just do regular unstratified
## CV
- cuts <- floor(length(y)/k)
- if(cuts < 2) cuts <- 2
- if(cuts > 5) cuts <- 5
+ cuts <- floor(length(y) / k)
+ if (cuts < 2) cuts <- 2
+ if (cuts > 5) cuts <- 5
y <- cut(y,
unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
include.lowest = TRUE)
@@ -324,7 +324,7 @@ xgb.createFolds <- function(y, k = 10)
y <- factor(as.character(y))
numInClass <- table(y)
foldVector <- vector(mode = "integer", length(y))
-
+
## For each class, balance the fold allocation as far
## as possible, then resample the remainder.
## The final assignment of folds is also randomized.
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 3f1be704f..5f964c4f8 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -118,23 +118,23 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
}
-
+
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
if (class(params$objective) == 'function') {
- obj = params$objective
- params[['objective']] = NULL
+ obj <- params$objective
+ params[['objective']] <- NULL
}
# if (!is.null(params$eval_metric) && !is.null(feval))
# stop("xgb.cv: cannot assign two different evaluation metrics")
if (!is.null(params$eval_metric))
- if (class(params$eval_metric)=='function') {
- feval = params$eval_metric
- params[['eval_metric']] = NULL
+ if (class(params$eval_metric) == 'function') {
+ feval <- params$eval_metric
+ params[['eval_metric']] <- NULL
}
-
+
# Early Stopping
if (!is.null(early.stop.round)){
if (!is.null(feval) && is.null(maximize))
@@ -144,12 +144,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
- maximize = FALSE
+ maximize <- FALSE
} else {
- maximize = TRUE
+ maximize <- TRUE
}
}
-
+
if (maximize) {
bestScore <- 0
} else {
@@ -157,26 +157,26 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
}
bestInd <- 0
earlyStopflag <- FALSE
-
- if (length(metrics)>1)
+
+ if (length(metrics) > 1)
warning('Only the first metric is used for early stopping process.')
}
-
+
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
obj_type <- params[['objective']]
mat_pred <- FALSE
if (!is.null(obj_type) && obj_type == 'multi:softprob')
{
- num_class = params[['num_class']]
+ num_class <- params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
- mat_pred = TRUE
+ mat_pred <- TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
- print.every.n = max(as.integer(print.every.n), 1L)
+ print.every.n <- max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
@@ -187,28 +187,27 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
- if (0 == (i-1L)%%print.every.n)
+ if (0 == (i - 1L) %% print.every.n)
cat(ret, "\n", sep="")
-
+
# early_Stopping
if (!is.null(early.stop.round)){
- score <- strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
+ score <- strsplit(ret,'\\s+')[[1]][1 + length(metrics) + 2]
score <- strsplit(score,'\\+|:')[[1]][[2]]
score <- as.numeric(score)
- if ((maximize && score > bestScore) || (!maximize && score < bestScore)) {
+ if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
bestScore <- score
bestInd <- i
} else {
- if (i-bestInd >= early.stop.round) {
+ if (i - bestInd >= early.stop.round) {
earlyStopflag <- TRUE
cat('Stopping. Best iteration:',bestInd)
break
}
}
}
-
}
-
+
if (prediction) {
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
@@ -225,24 +224,23 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
}
}
}
-
-
+
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
colnamesMean <- paste(colnames, "mean")
if(showsd) colnamesStd <- paste(colnames, "std")
-
+
colnames <- c()
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
-
+
type <- rep(x = "numeric", times = length(colnames))
dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
-
- for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
-
+
+ for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist( list( dt, .), use.names = F, fill = F)}
+
if (prediction) {
- return(list(dt = dt,pred = predictValues))
+ return( list( dt = dt,pred = predictValues))
}
return(dt)
}
diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R
index 0b0703587..478438a79 100644
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -66,8 +66,8 @@
#' xgb.importance(train$data@@Dimnames[[2]], model = bst, data = train$data, label = train$label)
#'
#' @export
-xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ((x + label) == 2)){
- if (!class(feature_names) %in% c("character", "NULL")) {
+xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ( (x + label) == 2)){
+ if (!class(feature_names) %in% c("character", "NULL")) {
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
}
@@ -79,7 +79,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
}
- if((is.null(data) & !is.null(label)) |(!is.null(data) & is.null(label))) {
+ if((is.null(data) & !is.null(label)) | (!is.null(data) & is.null(label))) {
stop("data/label: Provide the two arguments if you want co-occurence computation or none of them if you are not interested but not one of them only.")
}
@@ -98,7 +98,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
if(!is.null(data) | !is.null(label)) warning("data/label: these parameters should only be provided with decision tree based models.")
} else {
result <- treeDump(feature_names, text = text, keepDetail = !is.null(data))
-
+
# Co-occurence computation
if(!is.null(data) & !is.null(label) & nrow(result) > 0) {
# Take care of missing column
@@ -109,9 +109,9 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
# Apply split
d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
apply(c & d, 2, . %>% target %>% sum) -> vec
-
- result <- result[, "RealCover":= as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo:=NULL]
- }
+
+ result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo := NULL]
+ }
}
result
}
@@ -119,13 +119,13 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
treeDump <- function(feature_names, text, keepDetail){
if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
- result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo":= Missing == No ][Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain/sum(Gain), Cover = Cover/sum(Cover), Frequence = Frequence/sum(Frequence))][order(Gain, decreasing = T)]
+ result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequence = Frequence / sum(Frequence))][order(Gain, decreasing = T)]
result
}
linearDump <- function(feature_names, text){
- which(text == "weight:") %>% {a=.+1;text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .)
+ which(text == "weight:") %>% {a =. + 1; text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .)
}
# Avoid error messages during CRAN check.
diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R
index cef988962..882ac6c1f 100644
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -57,7 +57,7 @@
#' @export
xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, text = NULL, n_first_tree = NULL){
- if (!class(feature_names) %in% c("character", "NULL")) {
+ if (!class(feature_names) %in% c("character", "NULL")) {
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
}
if (!(class(filename_dump) %in% c("character", "NULL") && length(filename_dump) <= 1)) {
@@ -81,12 +81,12 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
}
if(!is.null(model)){
- text = xgb.dump(model = model, with.stats = T)
+ text <- xgb.dump(model = model, with.stats = T)
} else if(!is.null(filename_dump)){
text <- readLines(filename_dump) %>% str_trim(side = "both")
}
- position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text)+1)
+ position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text) + 1)
extract <- function(x, pattern) str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist
@@ -96,16 +96,16 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
allTrees <- data.table()
- anynumber_regex<-"[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
- for(i in 1:n_round){
-
- tree <- text[(position[i]+1):(position[i+1]-1)]
-
+ anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
+ for (i in 1:n_round){
+
+ tree <- text[(position[i] + 1):(position[i + 1] - 1)]
+
# avoid tree made of a leaf only (no split)
- if(length(tree) <2) next
-
- treeID <- i-1
-
+ if(length(tree) < 2) next
+
+ treeID <- i - 1
+
notLeaf <- str_match(tree, "leaf") %>% is.na
leaf <- notLeaf %>% not %>% tree[.]
branch <- notLeaf %>% tree[.]
@@ -128,38 +128,38 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
qualityLeaf <- extract(leaf, paste0("leaf=",anynumber_regex))
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
- dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
-
+ dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree := treeID]
+
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
}
yes <- allTrees[!is.na(Yes), Yes]
-
- set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
- j = "Yes.Feature",
+
+ set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
+ j = "Yes.Feature",
value = allTrees[ID %in% yes, Feature])
-
+
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
- j = "Yes.Cover",
+ j = "Yes.Cover",
value = allTrees[ID %in% yes, Cover])
-
+
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
- j = "Yes.Quality",
+ j = "Yes.Quality",
value = allTrees[ID %in% yes, Quality])
no <- allTrees[!is.na(No), No]
-
+
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
- j = "No.Feature",
+ j = "No.Feature",
value = allTrees[ID %in% no, Feature])
-
+
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
- j = "No.Cover",
+ j = "No.Cover",
value = allTrees[ID %in% no, Cover])
-
- set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
- j = "No.Quality",
+
+ set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
+ j = "No.Quality",
value = allTrees[ID %in% no, Quality])
-
+
allTrees
}
diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R
index f126dfe46..92399516d 100644
--- a/R-package/R/xgb.plot.importance.R
+++ b/R-package/R/xgb.plot.importance.R
@@ -30,7 +30,7 @@
#'
#' @export
xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1:10)){
- if (!"data.table" %in% class(importance_matrix)) {
+ if (!"data.table" %in% class(importance_matrix)) {
stop("importance_matrix: Should be a data.table.")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
@@ -42,13 +42,13 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
# To avoid issues in clustering when co-occurences are used
importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
-
+
clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
- importance_matrix[,"Cluster":=clusters$cluster %>% as.character]
-
- plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+ ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
-
- return(plot)
+ importance_matrix[,"Cluster" := clusters$cluster %>% as.character]
+
+ plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width = 0.05), environment = environment()) + ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
+
+ return(plot)
}
# Avoid error messages during CRAN check.
diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R
index edcd5f47f..5e359219a 100644
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -54,40 +54,39 @@
#'
#' @export
#'
-xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
-
+xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
+
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
stop("style: Has to be a character vector of size 1.")
}
-
+
if (!class(model) %in% c("xgb.Booster", "NULL")) {
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
}
-
+
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
}
-
+
if(is.null(model)){
- allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
+ allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
} else {
- allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
+ allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
}
-
- allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "
Cover: ", Cover, "
Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
-
- allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
-
-
+
+ allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "
Cover: ", Cover, "
Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
+
+ allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
+
if(is.null(CSSstyle)){
- CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
- }
-
- yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
-
- no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
-
- path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
+ CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
+ }
+
+ yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
+
+ no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
+
+ path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
DiagrammeR::mermaid(path, width, height)
}
diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R
index 2600b8cff..ad3cc8b12 100644
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -29,4 +29,4 @@ xgb.save <- function(model, fname) {
stop("xgb.save: the input must be xgb.Booster. Use xgb.DMatrix.save to save
xgb.DMatrix object.")
return(FALSE)
-}
+}
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index b1d79d866..8e839af5c 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -120,9 +120,9 @@
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
#' @export
#'
-xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
+xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
- early.stop.round = NULL, maximize = NULL,
+ early.stop.round = NULL, maximize = NULL,
save_period = 0, save_name = "xgboost.model", ...) {
dtrain <- data
if (typeof(params) != "list") {
@@ -139,30 +139,30 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
if (length(watchlist) != 0 && verbose == 0) {
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
}
-
- dot.params = list(...)
- nms.params = names(params)
- nms.dot.params = names(dot.params)
- if (length(intersect(nms.params,nms.dot.params))>0)
+
+ dot.params <- list(...)
+ nms.params <- names(params)
+ nms.dot.params <- names(dot.params)
+ if (length(intersect(nms.params,nms.dot.params)) > 0)
stop("Duplicated term in parameters. Please check your list of params.")
- params = append(params, dot.params)
-
+ params <- append(params, dot.params)
+
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.train: cannot assign two different objectives")
if (!is.null(params$objective))
- if (class(params$objective)=='function') {
- obj = params$objective
- params$objective = NULL
+ if (class(params$objective) == 'function') {
+ obj <- params$objective
+ params$objective <- NULL
}
if (!is.null(params$eval_metric) && !is.null(feval))
stop("xgb.train: cannot assign two different evaluation metrics")
if (!is.null(params$eval_metric))
- if (class(params$eval_metric)=='function') {
- feval = params$eval_metric
- params$eval_metric = NULL
+ if (class(params$eval_metric) == 'function') {
+ feval <- params$eval_metric
+ params$eval_metric <- NULL
}
-
+
# Early stopping
if (!is.null(early.stop.round)){
if (!is.null(feval) && is.null(maximize))
@@ -174,44 +174,43 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
- maximize = FALSE
+ maximize <- FALSE
} else {
- maximize = TRUE
+ maximize <- TRUE
}
}
-
+
if (maximize) {
- bestScore = 0
+ bestScore <- 0
} else {
- bestScore = Inf
+ bestScore <- Inf
}
- bestInd = 0
+ bestInd <- 0
earlyStopflag = FALSE
-
- if (length(watchlist)>1)
+
+ if (length(watchlist) > 1)
warning('Only the first data set in watchlist is used for early stopping process.')
}
-
-
+
handle <- xgb.Booster(params, append(watchlist, dtrain))
bst <- xgb.handleToBooster(handle)
- print.every.n=max( as.integer(print.every.n), 1L)
+ print.every.n <- max( as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
succ <- xgb.iter.update(bst$handle, dtrain, i - 1, obj)
if (length(watchlist) != 0) {
msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
- if (0== ( (i-1) %% print.every.n))
- cat(paste(msg, "\n", sep=""))
+ if (0 == ( (i - 1) %% print.every.n))
+ cat(paste(msg, "\n", sep = ""))
if (!is.null(early.stop.round))
{
- score = strsplit(msg,':|\\s+')[[1]][3]
- score = as.numeric(score)
- if ((maximize && score>bestScore) || (!maximize && score bestScore) || (!maximize && score < bestScore)) {
+ bestScore <- score
+ bestInd <- i
} else {
- if (i-bestInd>=early.stop.round) {
- earlyStopflag = TRUE
+ earlyStopflag = TRUE
+ if (i - bestInd >= early.stop.round) {
cat('Stopping. Best iteration:',bestInd)
break
}
@@ -226,8 +225,8 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
}
bst <- xgb.Booster.check(bst)
if (!is.null(early.stop.round)) {
- bst$bestScore = bestScore
- bst$bestInd = bestInd
+ bst$bestScore <- bestScore
+ bst$bestInd <- bestInd
}
return(bst)
-}
+}
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index e11052add..122d2f492 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -59,28 +59,26 @@
#'
#' @export
#'
-xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
- params = list(), nrounds,
+xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
+ params = list(), nrounds,
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) {
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
-
+
params <- append(params, list(...))
-
+
if (verbose > 0) {
watchlist <- list(train = dtrain)
} else {
watchlist <- list()
}
-
+
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print.every.n=print.every.n,
early.stop.round = early.stop.round, maximize = maximize,
save_period = save_period, save_name = save_name)
-
+
return(bst)
-}
-
-
+}
#' Training part from Mushroom Data Set
#'
#' This data set is originally from the Mushroom data set,
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 791f1246c..2e4e54902 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -4,30 +4,30 @@ context("basic functions")
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
-train = agaricus.train
-test = agaricus.test
+train <- agaricus.train
+test <- agaricus.test
test_that("train and predict", {
- bst = xgboost(data = train$data, label = train$label, max.depth = 2,
+ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
- pred = predict(bst, test$data)
+ pred <- predict(bst, test$data)
})
test_that("early stopping", {
- res = xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5,
+ res <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5,
eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic",
early.stop.round = 3, maximize = FALSE)
- expect_true(nrow(res)<20)
- bst = xgboost(data = train$data, label = train$label, max.depth = 2,
+ expect_true(nrow(res) < 20)
+ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic",
early.stop.round = 3, maximize = FALSE)
- pred = predict(bst, test$data)
+ pred <- predict(bst, test$data)
})
test_that("save_period", {
- bst = xgboost(data = train$data, label = train$label, max.depth = 2,
+ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic",
save_period = 10, save_name = "xgb.model")
- pred = predict(bst, test$data)
+ pred <- predict(bst, test$data)
})
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index 9fcbeca4d..3db595f49 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -7,40 +7,40 @@ test_that("custom objective works", {
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-
+
watchlist <- list(eval = dtest, train = dtrain)
num_round <- 2
-
+
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
- preds <- 1/(1 + exp(-preds))
+ preds <- 1 / (1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
evalerror <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
- err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
+ err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
return(list(metric = "error", value = err))
}
-
- param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
+
+ param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
objective=logregobj, eval_metric=evalerror)
-
+
bst <- xgb.train(param, dtrain, num_round, watchlist)
expect_equal(class(bst), "xgb.Booster")
expect_equal(length(bst$raw), 1064)
attr(dtrain, 'label') <- getinfo(dtrain, 'label')
-
+
logregobjattr <- function(preds, dtrain) {
labels <- attr(dtrain, 'label')
- preds <- 1/(1 + exp(-preds))
+ preds <- 1 / (1 + exp(-preds))
grad <- preds - labels
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
- param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
- objective=logregobjattr, eval_metric=evalerror)
+ param <- list(max.depth=2, eta=1, nthread = 2, silent = 1,
+ objective = logregobjattr, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, num_round, watchlist)
expect_equal(class(bst), "xgb.Booster")
expect_equal(length(bst$raw), 1064)
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 4d80146e3..0ac6b388e 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -8,11 +8,11 @@ require(vcd)
data(Arthritis)
data(agaricus.train, package='xgboost')
df <- data.table(Arthritis, keep.rownames = F)
-df[,AgeDiscret:= as.factor(round(Age/10,0))]
-df[,AgeCat:= as.factor(ifelse(Age > 30, "Old", "Young"))]
-df[,ID:=NULL]
-sparse_matrix = sparse.model.matrix(Improved~.-1, data = df)
-output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
+df[,AgeDiscret := as.factor(round(Age / 10,0))]
+df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
+df[,ID := NULL]
+sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
+output_vector <- df[,Y := 0][Improved == "Marked",Y := 1][,Y]
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R
index 5d3d78e27..c28820774 100644
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@@ -4,10 +4,10 @@ require(xgboost)
test_that("poisson regression works", {
data(mtcars)
- bst = xgboost(data=as.matrix(mtcars[,-11]),label=mtcars[,11],
- objective='count:poisson',nrounds=5)
+ bst <- xgboost(data = as.matrix(mtcars[,-11]),label = mtcars[,11],
+ objective = 'count:poisson', nrounds=5)
expect_equal(class(bst), "xgb.Booster")
- pred = predict(bst,as.matrix(mtcars[,-11]))
+ pred <- predict(bst,as.matrix(mtcars[, -11]))
expect_equal(length(pred), 32)
- sqrt(mean((pred-mtcars[,11])^2))
+ sqrt(mean( (pred - mtcars[,11]) ^ 2))
})
\ No newline at end of file