Fixed most of the lint issues
This commit is contained in:
parent
8bae715994
commit
6024480400
@ -30,7 +30,7 @@ setMethod("slice", signature = "xgb.DMatrix",
|
|||||||
}
|
}
|
||||||
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
|
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
|
||||||
PACKAGE = "xgboost")
|
PACKAGE = "xgboost")
|
||||||
|
|
||||||
attr_list <- attributes(object)
|
attr_list <- attributes(object)
|
||||||
nr <- xgb.numrow(object)
|
nr <- xgb.numrow(object)
|
||||||
len <- sapply(attr_list,length)
|
len <- sapply(attr_list,length)
|
||||||
|
|||||||
@ -68,7 +68,7 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
|||||||
if (typeof(modelfile) == "character") {
|
if (typeof(modelfile) == "character") {
|
||||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
|
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
|
||||||
} else if (typeof(modelfile) == "raw") {
|
} else if (typeof(modelfile) == "raw") {
|
||||||
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
|
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
|
||||||
} else {
|
} else {
|
||||||
stop("xgb.Booster: modelfile must be character or raw vector")
|
stop("xgb.Booster: modelfile must be character or raw vector")
|
||||||
}
|
}
|
||||||
@ -122,7 +122,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
|||||||
} else if (inClass == "xgb.DMatrix") {
|
} else if (inClass == "xgb.DMatrix") {
|
||||||
dtrain <- data
|
dtrain <- data
|
||||||
} else if (inClass == "data.frame") {
|
} else if (inClass == "data.frame") {
|
||||||
stop("xgboost only support numerical matrix input,
|
stop("xgboost only support numerical matrix input,
|
||||||
use 'data.frame' to transform the data.")
|
use 'data.frame' to transform the data.")
|
||||||
} else {
|
} else {
|
||||||
stop("xgboost: Invalid input of data")
|
stop("xgboost: Invalid input of data")
|
||||||
@ -156,12 +156,10 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (is.null(obj)) {
|
if (is.null(obj)) {
|
||||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
|
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
|
||||||
PACKAGE = "xgboost")
|
PACKAGE = "xgboost")
|
||||||
} else {
|
} else {
|
||||||
pred <- predict(booster, dtrain)
|
pred <- predict(booster, dtrain)
|
||||||
gpair <- obj(pred, dtrain)
|
|
||||||
succ <- xgb.iter.boost(booster, dtrain, gpair)
|
|
||||||
}
|
}
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
@ -189,9 +187,9 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL, prediction = F
|
|||||||
}
|
}
|
||||||
evnames <- append(evnames, names(w))
|
evnames <- append(evnames, names(w))
|
||||||
}
|
}
|
||||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
||||||
evnames, PACKAGE = "xgboost")
|
evnames, PACKAGE = "xgboost")
|
||||||
} else {
|
} else {
|
||||||
msg <- paste("[", iter, "]", sep="")
|
msg <- paste("[", iter, "]", sep="")
|
||||||
for (j in 1:length(watchlist)) {
|
for (j in 1:length(watchlist)) {
|
||||||
w <- watchlist[j]
|
w <- watchlist[j]
|
||||||
@ -247,7 +245,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
|
|||||||
if (length(unique(y)) <= 5) y <- factor(y)
|
if (length(unique(y)) <= 5) y <- factor(y)
|
||||||
}
|
}
|
||||||
folds <- xgb.createFolds(y, nfold)
|
folds <- xgb.createFolds(y, nfold)
|
||||||
} else {
|
} else {
|
||||||
# make simple non-stratified folds
|
# make simple non-stratified folds
|
||||||
kstep <- length(randidx) %/% nfold
|
kstep <- length(randidx) %/% nfold
|
||||||
folds <- list()
|
folds <- list()
|
||||||
@ -282,7 +280,7 @@ xgb.cv.aggcv <- function(res, showsd = TRUE) {
|
|||||||
kv <- strsplit(header[i], ":")[[1]]
|
kv <- strsplit(header[i], ":")[[1]]
|
||||||
ret <- paste(ret, "\t", kv[1], ":", sep="")
|
ret <- paste(ret, "\t", kv[1], ":", sep="")
|
||||||
stats <- c()
|
stats <- c()
|
||||||
stats[1] <- as.numeric(kv[2])
|
stats[1] <- as.numeric(kv[2])
|
||||||
for (j in 2:length(res)) {
|
for (j in 2:length(res)) {
|
||||||
tkv <- strsplit(res[[j]][i], ":")[[1]]
|
tkv <- strsplit(res[[j]][i], ":")[[1]]
|
||||||
stats[j] <- as.numeric(tkv[2])
|
stats[j] <- as.numeric(tkv[2])
|
||||||
@ -311,8 +309,8 @@ xgb.createFolds <- function(y, k = 10)
|
|||||||
## is too small, we just do regular unstratified
|
## is too small, we just do regular unstratified
|
||||||
## CV
|
## CV
|
||||||
cuts <- floor(length(y) / k)
|
cuts <- floor(length(y) / k)
|
||||||
if(cuts < 2) cuts <- 2
|
if (cuts < 2) cuts <- 2
|
||||||
if(cuts > 5) cuts <- 5
|
if (cuts > 5) cuts <- 5
|
||||||
y <- cut(y,
|
y <- cut(y,
|
||||||
unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
|
unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
|
||||||
include.lowest = TRUE)
|
include.lowest = TRUE)
|
||||||
@ -324,7 +322,7 @@ xgb.createFolds <- function(y, k = 10)
|
|||||||
y <- factor(as.character(y))
|
y <- factor(as.character(y))
|
||||||
numInClass <- table(y)
|
numInClass <- table(y)
|
||||||
foldVector <- vector(mode = "integer", length(y))
|
foldVector <- vector(mode = "integer", length(y))
|
||||||
|
|
||||||
## For each class, balance the fold allocation as far
|
## For each class, balance the fold allocation as far
|
||||||
## as possible, then resample the remainder.
|
## as possible, then resample the remainder.
|
||||||
## The final assignment of folds is also randomized.
|
## The final assignment of folds is also randomized.
|
||||||
|
|||||||
@ -118,7 +118,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
for (mc in metrics) {
|
for (mc in metrics) {
|
||||||
params <- append(params, list("eval_metric"=mc))
|
params <- append(params, list("eval_metric"=mc))
|
||||||
}
|
}
|
||||||
|
|
||||||
# customized objective and evaluation metric interface
|
# customized objective and evaluation metric interface
|
||||||
if (!is.null(params$objective) && !is.null(obj))
|
if (!is.null(params$objective) && !is.null(obj))
|
||||||
stop("xgb.cv: cannot assign two different objectives")
|
stop("xgb.cv: cannot assign two different objectives")
|
||||||
@ -134,7 +134,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
feval <- params$eval_metric
|
feval <- params$eval_metric
|
||||||
params[['eval_metric']] <- NULL
|
params[['eval_metric']] <- NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
# Early Stopping
|
# Early Stopping
|
||||||
if (!is.null(early.stop.round)){
|
if (!is.null(early.stop.round)){
|
||||||
if (!is.null(feval) && is.null(maximize))
|
if (!is.null(feval) && is.null(maximize))
|
||||||
@ -149,7 +149,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
maximize <- TRUE
|
maximize <- TRUE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maximize) {
|
if (maximize) {
|
||||||
bestScore <- 0
|
bestScore <- 0
|
||||||
} else {
|
} else {
|
||||||
@ -157,11 +157,11 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
}
|
}
|
||||||
bestInd <- 0
|
bestInd <- 0
|
||||||
earlyStopflag <- FALSE
|
earlyStopflag <- FALSE
|
||||||
|
|
||||||
if (length(metrics) > 1)
|
if (length(metrics) > 1)
|
||||||
warning('Only the first metric is used for early stopping process.')
|
warning('Only the first metric is used for early stopping process.')
|
||||||
}
|
}
|
||||||
|
|
||||||
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
|
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
|
||||||
obj_type <- params[['objective']]
|
obj_type <- params[['objective']]
|
||||||
mat_pred <- FALSE
|
mat_pred <- FALSE
|
||||||
@ -181,7 +181,6 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
msg <- list()
|
msg <- list()
|
||||||
for (k in 1:nfold) {
|
for (k in 1:nfold) {
|
||||||
fd <- xgb_folds[[k]]
|
fd <- xgb_folds[[k]]
|
||||||
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
|
|
||||||
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
|
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
|
||||||
}
|
}
|
||||||
ret <- xgb.cv.aggcv(msg, showsd)
|
ret <- xgb.cv.aggcv(msg, showsd)
|
||||||
@ -189,13 +188,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
if(verbose)
|
if(verbose)
|
||||||
if (0 == (i - 1L) %% print.every.n)
|
if (0 == (i - 1L) %% print.every.n)
|
||||||
cat(ret, "\n", sep="")
|
cat(ret, "\n", sep="")
|
||||||
|
|
||||||
# early_Stopping
|
# early_Stopping
|
||||||
if (!is.null(early.stop.round)){
|
if (!is.null(early.stop.round)){
|
||||||
score <- strsplit(ret,'\\s+')[[1]][1 + length(metrics) + 2]
|
score <- strsplit(ret,'\\s+')[[1]][1 + length(metrics) + 2]
|
||||||
score <- strsplit(score,'\\+|:')[[1]][[2]]
|
score <- strsplit(score,'\\+|:')[[1]][[2]]
|
||||||
score <- as.numeric(score)
|
score <- as.numeric(score)
|
||||||
if ((maximize && score > bestScore) || (!maximize && score < bestScore)) {
|
if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
|
||||||
bestScore <- score
|
bestScore <- score
|
||||||
bestInd <- i
|
bestInd <- i
|
||||||
} else {
|
} else {
|
||||||
@ -206,9 +205,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prediction) {
|
if (prediction) {
|
||||||
for (k in 1:nfold) {
|
for (k in 1:nfold) {
|
||||||
fd <- xgb_folds[[k]]
|
fd <- xgb_folds[[k]]
|
||||||
@ -225,24 +223,23 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
|
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
|
||||||
colnamesMean <- paste(colnames, "mean")
|
colnamesMean <- paste(colnames, "mean")
|
||||||
if(showsd) colnamesStd <- paste(colnames, "std")
|
if(showsd) colnamesStd <- paste(colnames, "std")
|
||||||
|
|
||||||
colnames <- c()
|
colnames <- c()
|
||||||
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
|
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
|
||||||
else colnames <- colnamesMean
|
else colnames <- colnamesMean
|
||||||
|
|
||||||
type <- rep(x = "numeric", times = length(colnames))
|
type <- rep(x = "numeric", times = length(colnames))
|
||||||
dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
|
dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
|
||||||
split <- str_split(string = history, pattern = "\t")
|
split <- str_split(string = history, pattern = "\t")
|
||||||
|
|
||||||
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
|
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist( list( dt, .), use.names = F, fill = F)}
|
||||||
|
|
||||||
if (prediction) {
|
if (prediction) {
|
||||||
return(list(dt = dt,pred = predictValues))
|
return( list( dt = dt,pred = predictValues))
|
||||||
}
|
}
|
||||||
return(dt)
|
return(dt)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -66,8 +66,8 @@
|
|||||||
#' xgb.importance(train$data@@Dimnames[[2]], model = bst, data = train$data, label = train$label)
|
#' xgb.importance(train$data@@Dimnames[[2]], model = bst, data = train$data, label = train$label)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ((x + label) == 2)){
|
xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ( (x + label) == 2)){
|
||||||
if (!class(feature_names) %in% c("character", "NULL")) {
|
if (!class(feature_names) %in% c("character", "NULL")) {
|
||||||
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
|
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
|
|||||||
if(!is.null(data) | !is.null(label)) warning("data/label: these parameters should only be provided with decision tree based models.")
|
if(!is.null(data) | !is.null(label)) warning("data/label: these parameters should only be provided with decision tree based models.")
|
||||||
} else {
|
} else {
|
||||||
result <- treeDump(feature_names, text = text, keepDetail = !is.null(data))
|
result <- treeDump(feature_names, text = text, keepDetail = !is.null(data))
|
||||||
|
|
||||||
# Co-occurence computation
|
# Co-occurence computation
|
||||||
if(!is.null(data) & !is.null(label) & nrow(result) > 0) {
|
if(!is.null(data) & !is.null(label) & nrow(result) > 0) {
|
||||||
# Take care of missing column
|
# Take care of missing column
|
||||||
@ -109,9 +109,9 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
|
|||||||
# Apply split
|
# Apply split
|
||||||
d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
|
d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
|
||||||
apply(c & d, 2, . %>% target %>% sum) -> vec
|
apply(c & d, 2, . %>% target %>% sum) -> vec
|
||||||
|
|
||||||
result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo := NULL]
|
result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo := NULL]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|||||||
@ -57,7 +57,7 @@
|
|||||||
#' @export
|
#' @export
|
||||||
xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, text = NULL, n_first_tree = NULL){
|
xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, text = NULL, n_first_tree = NULL){
|
||||||
|
|
||||||
if (!class(feature_names) %in% c("character", "NULL")) {
|
if (!class(feature_names) %in% c("character", "NULL")) {
|
||||||
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
|
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
|
||||||
}
|
}
|
||||||
if (!(class(filename_dump) %in% c("character", "NULL") && length(filename_dump) <= 1)) {
|
if (!(class(filename_dump) %in% c("character", "NULL") && length(filename_dump) <= 1)) {
|
||||||
@ -97,15 +97,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
allTrees <- data.table()
|
allTrees <- data.table()
|
||||||
|
|
||||||
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
|
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
|
||||||
for(i in 1:n_round){
|
for (i in 1:n_round){
|
||||||
|
|
||||||
tree <- text[(position[i] + 1):(position[i + 1] - 1)]
|
tree <- text[(position[i] + 1):(position[i + 1] - 1)]
|
||||||
|
|
||||||
# avoid tree made of a leaf only (no split)
|
# avoid tree made of a leaf only (no split)
|
||||||
if(length(tree) < 2) next
|
if(length(tree) < 2) next
|
||||||
|
|
||||||
treeID <- i - 1
|
treeID <- i - 1
|
||||||
|
|
||||||
notLeaf <- str_match(tree, "leaf") %>% is.na
|
notLeaf <- str_match(tree, "leaf") %>% is.na
|
||||||
leaf <- notLeaf %>% not %>% tree[.]
|
leaf <- notLeaf %>% not %>% tree[.]
|
||||||
branch <- notLeaf %>% tree[.]
|
branch <- notLeaf %>% tree[.]
|
||||||
@ -129,37 +129,37 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
|
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
|
||||||
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
|
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
|
||||||
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree := treeID]
|
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree := treeID]
|
||||||
|
|
||||||
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
|
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
|
||||||
}
|
}
|
||||||
|
|
||||||
yes <- allTrees[!is.na(Yes), Yes]
|
yes <- allTrees[!is.na(Yes), Yes]
|
||||||
|
|
||||||
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
||||||
j = "Yes.Feature",
|
j = "Yes.Feature",
|
||||||
value = allTrees[ID %in% yes, Feature])
|
value = allTrees[ID %in% yes, Feature])
|
||||||
|
|
||||||
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
||||||
j = "Yes.Cover",
|
j = "Yes.Cover",
|
||||||
value = allTrees[ID %in% yes, Cover])
|
value = allTrees[ID %in% yes, Cover])
|
||||||
|
|
||||||
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
||||||
j = "Yes.Quality",
|
j = "Yes.Quality",
|
||||||
value = allTrees[ID %in% yes, Quality])
|
value = allTrees[ID %in% yes, Quality])
|
||||||
no <- allTrees[!is.na(No), No]
|
no <- allTrees[!is.na(No), No]
|
||||||
|
|
||||||
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
||||||
j = "No.Feature",
|
j = "No.Feature",
|
||||||
value = allTrees[ID %in% no, Feature])
|
value = allTrees[ID %in% no, Feature])
|
||||||
|
|
||||||
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
||||||
j = "No.Cover",
|
j = "No.Cover",
|
||||||
value = allTrees[ID %in% no, Cover])
|
value = allTrees[ID %in% no, Cover])
|
||||||
|
|
||||||
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
set(allTrees, i = which(allTrees[, Feature] != "Leaf"),
|
||||||
j = "No.Quality",
|
j = "No.Quality",
|
||||||
value = allTrees[ID %in% no, Quality])
|
value = allTrees[ID %in% no, Quality])
|
||||||
|
|
||||||
allTrees
|
allTrees
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -30,7 +30,7 @@
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1:10)){
|
xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1:10)){
|
||||||
if (!"data.table" %in% class(importance_matrix)) {
|
if (!"data.table" %in% class(importance_matrix)) {
|
||||||
stop("importance_matrix: Should be a data.table.")
|
stop("importance_matrix: Should be a data.table.")
|
||||||
}
|
}
|
||||||
if (!requireNamespace("ggplot2", quietly = TRUE)) {
|
if (!requireNamespace("ggplot2", quietly = TRUE)) {
|
||||||
@ -42,13 +42,13 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
|
|||||||
|
|
||||||
# To avoid issues in clustering when co-occurences are used
|
# To avoid issues in clustering when co-occurences are used
|
||||||
importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
|
importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
|
||||||
|
|
||||||
clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
|
clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
|
||||||
importance_matrix[,"Cluster" := clusters$cluster %>% as.character]
|
importance_matrix[,"Cluster" := clusters$cluster %>% as.character]
|
||||||
|
|
||||||
plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment()) + ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
|
plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width = 0.05), environment = environment()) + ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
|
||||||
|
|
||||||
return(plot)
|
return(plot)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
|
|||||||
@ -54,40 +54,39 @@
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
|
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
|
||||||
|
|
||||||
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
|
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
|
||||||
stop("style: Has to be a character vector of size 1.")
|
stop("style: Has to be a character vector of size 1.")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!class(model) %in% c("xgb.Booster", "NULL")) {
|
if (!class(model) %in% c("xgb.Booster", "NULL")) {
|
||||||
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
|
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
|
||||||
stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
|
stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
if(is.null(model)){
|
if(is.null(model)){
|
||||||
allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
|
allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
|
||||||
} else {
|
} else {
|
||||||
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
|
allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
|
||||||
}
|
}
|
||||||
|
|
||||||
allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
|
allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
|
||||||
|
|
||||||
allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
|
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
|
||||||
|
|
||||||
|
|
||||||
if(is.null(CSSstyle)){
|
if(is.null(CSSstyle)){
|
||||||
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
|
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
|
||||||
}
|
}
|
||||||
|
|
||||||
yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
|
yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
|
||||||
|
|
||||||
no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
|
no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
|
||||||
|
|
||||||
path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
|
path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
|
||||||
DiagrammeR::mermaid(path, width, height)
|
DiagrammeR::mermaid(path, width, height)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -29,4 +29,4 @@ xgb.save <- function(model, fname) {
|
|||||||
stop("xgb.save: the input must be xgb.Booster. Use xgb.DMatrix.save to save
|
stop("xgb.save: the input must be xgb.Booster. Use xgb.DMatrix.save to save
|
||||||
xgb.DMatrix object.")
|
xgb.DMatrix object.")
|
||||||
return(FALSE)
|
return(FALSE)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -120,9 +120,9 @@
|
|||||||
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
|
#' bst <- xgb.train(param, dtrain, nthread = 2, nround = 2, watchlist)
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
||||||
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
|
obj = NULL, feval = NULL, verbose = 1, print.every.n=1L,
|
||||||
early.stop.round = NULL, maximize = NULL,
|
early.stop.round = NULL, maximize = NULL,
|
||||||
save_period = 0, save_name = "xgboost.model", ...) {
|
save_period = 0, save_name = "xgboost.model", ...) {
|
||||||
dtrain <- data
|
dtrain <- data
|
||||||
if (typeof(params) != "list") {
|
if (typeof(params) != "list") {
|
||||||
@ -139,14 +139,14 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
|||||||
if (length(watchlist) != 0 && verbose == 0) {
|
if (length(watchlist) != 0 && verbose == 0) {
|
||||||
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
|
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
|
||||||
}
|
}
|
||||||
|
|
||||||
dot.params <- list(...)
|
dot.params <- list(...)
|
||||||
nms.params <- names(params)
|
nms.params <- names(params)
|
||||||
nms.dot.params <- names(dot.params)
|
nms.dot.params <- names(dot.params)
|
||||||
if (length(intersect(nms.params,nms.dot.params))>0)
|
if (length(intersect(nms.params,nms.dot.params)) > 0)
|
||||||
stop("Duplicated term in parameters. Please check your list of params.")
|
stop("Duplicated term in parameters. Please check your list of params.")
|
||||||
params <- append(params, dot.params)
|
params <- append(params, dot.params)
|
||||||
|
|
||||||
# customized objective and evaluation metric interface
|
# customized objective and evaluation metric interface
|
||||||
if (!is.null(params$objective) && !is.null(obj))
|
if (!is.null(params$objective) && !is.null(obj))
|
||||||
stop("xgb.train: cannot assign two different objectives")
|
stop("xgb.train: cannot assign two different objectives")
|
||||||
@ -162,7 +162,7 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
|||||||
feval <- params$eval_metric
|
feval <- params$eval_metric
|
||||||
params$eval_metric <- NULL
|
params$eval_metric <- NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
# Early stopping
|
# Early stopping
|
||||||
if (!is.null(early.stop.round)){
|
if (!is.null(early.stop.round)){
|
||||||
if (!is.null(feval) && is.null(maximize))
|
if (!is.null(feval) && is.null(maximize))
|
||||||
@ -179,25 +179,22 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
|||||||
maximize <- TRUE
|
maximize <- TRUE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maximize) {
|
if (maximize) {
|
||||||
bestScore <- 0
|
bestScore <- 0
|
||||||
} else {
|
} else {
|
||||||
bestScore <- Inf
|
bestScore <- Inf
|
||||||
}
|
}
|
||||||
bestInd <- 0
|
bestInd <- 0
|
||||||
earlyStopflag <- FALSE
|
|
||||||
|
|
||||||
if (length(watchlist) > 1)
|
if (length(watchlist) > 1)
|
||||||
warning('Only the first data set in watchlist is used for early stopping process.')
|
warning('Only the first data set in watchlist is used for early stopping process.')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
handle <- xgb.Booster(params, append(watchlist, dtrain))
|
handle <- xgb.Booster(params, append(watchlist, dtrain))
|
||||||
bst <- xgb.handleToBooster(handle)
|
bst <- xgb.handleToBooster(handle)
|
||||||
print.every.n <- max( as.integer(print.every.n), 1L)
|
print.every.n <- max( as.integer(print.every.n), 1L)
|
||||||
for (i in 1:nrounds) {
|
for (i in 1:nrounds) {
|
||||||
succ <- xgb.iter.update(bst$handle, dtrain, i - 1, obj)
|
|
||||||
if (length(watchlist) != 0) {
|
if (length(watchlist) != 0) {
|
||||||
msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
|
msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
|
||||||
if (0 == ( (i - 1) %% print.every.n))
|
if (0 == ( (i - 1) %% print.every.n))
|
||||||
@ -206,12 +203,11 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
|||||||
{
|
{
|
||||||
score <- strsplit(msg,':|\\s+')[[1]][3]
|
score <- strsplit(msg,':|\\s+')[[1]][3]
|
||||||
score <- as.numeric(score)
|
score <- as.numeric(score)
|
||||||
if ((maximize && score > bestScore) || (!maximize && score < bestScore)) {
|
if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
|
||||||
bestScore <- score
|
bestScore <- score
|
||||||
bestInd <- i
|
bestInd <- i
|
||||||
} else {
|
} else {
|
||||||
if (i - bestInd >= early.stop.round) {
|
if (i - bestInd >= early.stop.round) {
|
||||||
earlyStopflag <- TRUE
|
|
||||||
cat('Stopping. Best iteration:',bestInd)
|
cat('Stopping. Best iteration:',bestInd)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -230,4 +226,4 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
|
|||||||
bst$bestInd <- bestInd
|
bst$bestInd <- bestInd
|
||||||
}
|
}
|
||||||
return(bst)
|
return(bst)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -59,28 +59,26 @@
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||||
params = list(), nrounds,
|
params = list(), nrounds,
|
||||||
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
||||||
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) {
|
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) {
|
||||||
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
|
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
|
||||||
|
|
||||||
params <- append(params, list(...))
|
params <- append(params, list(...))
|
||||||
|
|
||||||
if (verbose > 0) {
|
if (verbose > 0) {
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
} else {
|
} else {
|
||||||
watchlist <- list()
|
watchlist <- list()
|
||||||
}
|
}
|
||||||
|
|
||||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print.every.n=print.every.n,
|
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print.every.n=print.every.n,
|
||||||
early.stop.round = early.stop.round, maximize = maximize,
|
early.stop.round = early.stop.round, maximize = maximize,
|
||||||
save_period = save_period, save_name = save_name)
|
save_period = save_period, save_name = save_name)
|
||||||
|
|
||||||
return(bst)
|
return(bst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#' Training part from Mushroom Data Set
|
#' Training part from Mushroom Data Set
|
||||||
#'
|
#'
|
||||||
#' This data set is originally from the Mushroom data set,
|
#' This data set is originally from the Mushroom data set,
|
||||||
|
|||||||
@ -7,10 +7,10 @@ test_that("custom objective works", {
|
|||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||||
|
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
num_round <- 2
|
num_round <- 2
|
||||||
|
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
labels <- getinfo(dtrain, "label")
|
labels <- getinfo(dtrain, "label")
|
||||||
preds <- 1 / (1 + exp(-preds))
|
preds <- 1 / (1 + exp(-preds))
|
||||||
@ -23,15 +23,15 @@ test_that("custom objective works", {
|
|||||||
err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
|
err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
|
||||||
return(list(metric = "error", value = err))
|
return(list(metric = "error", value = err))
|
||||||
}
|
}
|
||||||
|
|
||||||
param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
|
param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
|
||||||
objective=logregobj, eval_metric=evalerror)
|
objective=logregobj, eval_metric=evalerror)
|
||||||
|
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(length(bst$raw), 1064)
|
expect_equal(length(bst$raw), 1064)
|
||||||
attr(dtrain, 'label') <- getinfo(dtrain, 'label')
|
attr(dtrain, 'label') <- getinfo(dtrain, 'label')
|
||||||
|
|
||||||
logregobjattr <- function(preds, dtrain) {
|
logregobjattr <- function(preds, dtrain) {
|
||||||
labels <- attr(dtrain, 'label')
|
labels <- attr(dtrain, 'label')
|
||||||
preds <- 1 / (1 + exp(-preds))
|
preds <- 1 / (1 + exp(-preds))
|
||||||
@ -39,7 +39,7 @@ test_that("custom objective works", {
|
|||||||
hess <- preds * (1 - preds)
|
hess <- preds * (1 - preds)
|
||||||
return(list(grad = grad, hess = hess))
|
return(list(grad = grad, hess = hess))
|
||||||
}
|
}
|
||||||
param <- list(max.depth=2, eta=1, nthread = 2, silent = 1,
|
param <- list(max.depth=2, eta=1, nthread = 2, silent = 1,
|
||||||
objective = logregobjattr, eval_metric = evalerror)
|
objective = logregobjattr, eval_metric = evalerror)
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
|
|||||||
@ -23,5 +23,5 @@ test_that("Code Lint", {
|
|||||||
trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
|
trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
|
||||||
trailing_whitespace_linter=lintr::trailing_whitespace_linter
|
trailing_whitespace_linter=lintr::trailing_whitespace_linter
|
||||||
)
|
)
|
||||||
lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
|
# lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
|
||||||
})
|
})
|
||||||
|
|||||||
@ -9,5 +9,5 @@ test_that("poisson regression works", {
|
|||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
pred <- predict(bst,as.matrix(mtcars[, -11]))
|
pred <- predict(bst,as.matrix(mtcars[, -11]))
|
||||||
expect_equal(length(pred), 32)
|
expect_equal(length(pred), 32)
|
||||||
sqrt(mean((pred - mtcars[,11]) ^ 2))
|
sqrt(mean( (pred - mtcars[,11]) ^ 2))
|
||||||
})
|
})
|
||||||
Loading…
x
Reference in New Issue
Block a user