Merge remote-tracking branch 'dmlc/master'

This commit is contained in:
El Potaeto
2015-08-05 12:07:41 +02:00
62 changed files with 1802 additions and 834 deletions

View File

@@ -288,7 +288,7 @@ xgb.cv.aggcv <- function(res, showsd = TRUE) {
}
ret <- paste(ret, sprintf("%f", mean(stats)), sep="")
if (showsd) {
ret <- paste(ret, sprintf("+%f", sd(stats)), sep="")
ret <- paste(ret, sprintf("+%f", stats::sd(stats)), sep="")
}
}
return (ret)
@@ -313,7 +313,7 @@ xgb.createFolds <- function(y, k = 10)
if(cuts < 2) cuts <- 2
if(cuts > 5) cuts <- 5
y <- cut(y,
unique(quantile(y, probs = seq(0, 1, length = cuts))),
unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
include.lowest = TRUE)
}

View File

@@ -240,7 +240,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
else colnames <- colnamesMean
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}

View File

@@ -134,34 +134,33 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
}
yes <- allTrees[!is.na(Yes),Yes]
set(allTrees, i = which(allTrees[,Feature]!= "Leaf"),
yes <- allTrees[!is.na(Yes), Yes]
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
j = "Yes.Feature",
value = allTrees[ID == yes,Feature])
set(allTrees, i = which(allTrees[,Feature]!= "Leaf"),
value = allTrees[ID %in% yes, Feature])
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
j = "Yes.Cover",
value = allTrees[ID == yes,Cover])
set(allTrees, i = which(allTrees[,Feature]!= "Leaf"),
j = "Yes.Quality",
value = allTrees[ID == yes,Quality])
value = allTrees[ID %in% yes, Cover])
no <- allTrees[!is.na(No),No]
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
j = "Yes.Quality",
value = allTrees[ID %in% yes, Quality])
no <- allTrees[!is.na(No), No]
set(allTrees, i = which(allTrees[,Feature]!= "Leaf"),
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
j = "No.Feature",
value = allTrees[ID == no,Feature])
value = allTrees[ID %in% no, Feature])
set(allTrees, i = which(allTrees[,Feature]!= "Leaf"),
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
j = "No.Cover",
value = allTrees[ID == no,Cover])
value = allTrees[ID %in% no, Cover])
set(allTrees, i = which(allTrees[,Feature]!= "Leaf"),
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
j = "No.Quality",
value = allTrees[ID == no,Quality])
value = allTrees[ID %in% no, Quality])
allTrees
}

View File

@@ -33,7 +33,7 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
if (!"data.table" %in% class(importance_matrix)) {
stop("importance_matrix: Should be a data.table.")
}
if (!require(ggplot2, quietly = TRUE)) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("ggplot2 package is required for plotting the importance", call. = FALSE)
}
if (!requireNamespace("Ckmeans.1d.dp", quietly = TRUE)) {
@@ -46,7 +46,7 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
importance_matrix[,"Cluster":=clusters$cluster %>% as.character]
plot <- ggplot(importance_matrix, aes(x=reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+ geom_bar(aes(fill=Cluster), stat="identity", position="identity") + coord_flip() + xlab("Features") + ylab("Gain") + ggtitle("Feature importance") + theme(plot.title = element_text(lineheight=.9, face="bold"), panel.grid.major.y = element_blank() )
plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+ ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
return(plot)
}