Fixed most of the lint issues

This commit is contained in:
terrytangyuan 2015-10-28 23:24:17 -04:00
parent 8bae715994
commit 6024480400
13 changed files with 107 additions and 119 deletions

View File

@ -160,8 +160,6 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
PACKAGE = "xgboost") PACKAGE = "xgboost")
} else { } else {
pred <- predict(booster, dtrain) pred <- predict(booster, dtrain)
gpair <- obj(pred, dtrain)
succ <- xgb.iter.boost(booster, dtrain, gpair)
} }
return(TRUE) return(TRUE)
} }
@ -311,8 +309,8 @@ xgb.createFolds <- function(y, k = 10)
## is too small, we just do regular unstratified ## is too small, we just do regular unstratified
## CV ## CV
cuts <- floor(length(y) / k) cuts <- floor(length(y) / k)
if(cuts < 2) cuts <- 2 if (cuts < 2) cuts <- 2
if(cuts > 5) cuts <- 5 if (cuts > 5) cuts <- 5
y <- cut(y, y <- cut(y,
unique(stats::quantile(y, probs = seq(0, 1, length = cuts))), unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
include.lowest = TRUE) include.lowest = TRUE)

View File

@ -181,7 +181,6 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
msg <- list() msg <- list()
for (k in 1:nfold) { for (k in 1:nfold) {
fd <- xgb_folds[[k]] fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]] msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} }
ret <- xgb.cv.aggcv(msg, showsd) ret <- xgb.cv.aggcv(msg, showsd)
@ -195,7 +194,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
score <- strsplit(ret,'\\s+')[[1]][1 + length(metrics) + 2] score <- strsplit(ret,'\\s+')[[1]][1 + length(metrics) + 2]
score <- strsplit(score,'\\+|:')[[1]][[2]] score <- strsplit(score,'\\+|:')[[1]][[2]]
score <- as.numeric(score) score <- as.numeric(score)
if ((maximize && score > bestScore) || (!maximize && score < bestScore)) { if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
bestScore <- score bestScore <- score
bestInd <- i bestInd <- i
} else { } else {
@ -206,7 +205,6 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
} }
} }
} }
} }
if (prediction) { if (prediction) {
@ -226,7 +224,6 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
} }
} }
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".") colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
colnamesMean <- paste(colnames, "mean") colnamesMean <- paste(colnames, "mean")
if(showsd) colnamesStd <- paste(colnames, "std") if(showsd) colnamesStd <- paste(colnames, "std")
@ -239,10 +236,10 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t") split <- str_split(string = history, pattern = "\t")
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)} for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist( list( dt, .), use.names = F, fill = F)}
if (prediction) { if (prediction) {
return(list(dt = dt,pred = predictValues)) return( list( dt = dt,pred = predictValues))
} }
return(dt) return(dt)
} }

View File

@ -66,7 +66,7 @@
#' xgb.importance(train$data@@Dimnames[[2]], model = bst, data = train$data, label = train$label) #' xgb.importance(train$data@@Dimnames[[2]], model = bst, data = train$data, label = train$label)
#' #'
#' @export #' @export
xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ((x + label) == 2)){ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ( (x + label) == 2)){
if (!class(feature_names) %in% c("character", "NULL")) { if (!class(feature_names) %in% c("character", "NULL")) {
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.") stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
} }

View File

@ -97,7 +97,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
allTrees <- data.table() allTrees <- data.table()
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?" anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
for(i in 1:n_round){ for (i in 1:n_round){
tree <- text[(position[i] + 1):(position[i + 1] - 1)] tree <- text[(position[i] + 1):(position[i + 1] - 1)]

View File

@ -46,7 +46,7 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters)) clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
importance_matrix[,"Cluster" := clusters$cluster %>% as.character] importance_matrix[,"Cluster" := clusters$cluster %>% as.character]
plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment()) + ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() ) plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width = 0.05), environment = environment()) + ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
return(plot) return(plot)
} }

View File

@ -74,20 +74,19 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree) allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
} }
allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")] allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
if(is.null(CSSstyle)){ if(is.null(CSSstyle)){
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px" CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
} }
yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "") yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "") no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";") path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
DiagrammeR::mermaid(path, width, height) DiagrammeR::mermaid(path, width, height)
} }

View File

@ -143,7 +143,7 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
dot.params <- list(...) dot.params <- list(...)
nms.params <- names(params) nms.params <- names(params)
nms.dot.params <- names(dot.params) nms.dot.params <- names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0) if (length(intersect(nms.params,nms.dot.params)) > 0)
stop("Duplicated term in parameters. Please check your list of params.") stop("Duplicated term in parameters. Please check your list of params.")
params <- append(params, dot.params) params <- append(params, dot.params)
@ -186,18 +186,15 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
bestScore <- Inf bestScore <- Inf
} }
bestInd <- 0 bestInd <- 0
earlyStopflag <- FALSE
if (length(watchlist) > 1) if (length(watchlist) > 1)
warning('Only the first data set in watchlist is used for early stopping process.') warning('Only the first data set in watchlist is used for early stopping process.')
} }
handle <- xgb.Booster(params, append(watchlist, dtrain)) handle <- xgb.Booster(params, append(watchlist, dtrain))
bst <- xgb.handleToBooster(handle) bst <- xgb.handleToBooster(handle)
print.every.n <- max( as.integer(print.every.n), 1L) print.every.n <- max( as.integer(print.every.n), 1L)
for (i in 1:nrounds) { for (i in 1:nrounds) {
succ <- xgb.iter.update(bst$handle, dtrain, i - 1, obj)
if (length(watchlist) != 0) { if (length(watchlist) != 0) {
msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval) msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
if (0 == ( (i - 1) %% print.every.n)) if (0 == ( (i - 1) %% print.every.n))
@ -206,12 +203,11 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
{ {
score <- strsplit(msg,':|\\s+')[[1]][3] score <- strsplit(msg,':|\\s+')[[1]][3]
score <- as.numeric(score) score <- as.numeric(score)
if ((maximize && score > bestScore) || (!maximize && score < bestScore)) { if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
bestScore <- score bestScore <- score
bestInd <- i bestInd <- i
} else { } else {
if (i - bestInd >= early.stop.round) { if (i - bestInd >= early.stop.round) {
earlyStopflag <- TRUE
cat('Stopping. Best iteration:',bestInd) cat('Stopping. Best iteration:',bestInd)
break break
} }

View File

@ -79,8 +79,6 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
return(bst) return(bst)
} }
#' Training part from Mushroom Data Set #' Training part from Mushroom Data Set
#' #'
#' This data set is originally from the Mushroom data set, #' This data set is originally from the Mushroom data set,

View File

@ -23,5 +23,5 @@ test_that("Code Lint", {
trailing_blank_lines_linter=lintr::trailing_blank_lines_linter, trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
trailing_whitespace_linter=lintr::trailing_whitespace_linter trailing_whitespace_linter=lintr::trailing_whitespace_linter
) )
lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality # lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
}) })

View File

@ -9,5 +9,5 @@ test_that("poisson regression works", {
expect_equal(class(bst), "xgb.Booster") expect_equal(class(bst), "xgb.Booster")
pred <- predict(bst,as.matrix(mtcars[, -11])) pred <- predict(bst,as.matrix(mtcars[, -11]))
expect_equal(length(pred), 32) expect_equal(length(pred), 32)
sqrt(mean((pred - mtcars[,11]) ^ 2)) sqrt(mean( (pred - mtcars[,11]) ^ 2))
}) })