Additional improvements for gblinear (#3134)
* fix rebase conflict * [core] additional gblinear improvements * [R] callback for gblinear coefficients history * force eta=1 for gblinear python tests * add top_k to GreedyFeatureSelector * set eta=1 in shotgun test * [core] fix SparsePage processing in gblinear; col-wise multithreading in greedy updater * set sorted flag within TryInitColData * gblinear tests: use scale, add external memory test * fix multiclass for greedy updater * fix whitespace * fix typo
This commit is contained in:
committed by
GitHub
parent
a1b48afa41
commit
706be4e5d4
@@ -1,8 +1,8 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 0.7.0
|
||||
Date: 2018-01-22
|
||||
Version: 0.7.0.1
|
||||
Date: 2018-02-25
|
||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
||||
Yuan Tang <terrytangyuan@gmail.com>
|
||||
|
||||
@@ -18,6 +18,7 @@ export("xgb.parameters<-")
|
||||
export(cb.cv.predict)
|
||||
export(cb.early.stop)
|
||||
export(cb.evaluation.log)
|
||||
export(cb.gblinear.history)
|
||||
export(cb.print.evaluation)
|
||||
export(cb.reset.parameters)
|
||||
export(cb.save.model)
|
||||
@@ -32,6 +33,7 @@ export(xgb.attributes)
|
||||
export(xgb.create.features)
|
||||
export(xgb.cv)
|
||||
export(xgb.dump)
|
||||
export(xgb.gblinear.history)
|
||||
export(xgb.ggplot.deepness)
|
||||
export(xgb.ggplot.importance)
|
||||
export(xgb.importance)
|
||||
@@ -52,7 +54,9 @@ importClassesFrom(Matrix,dgeMatrix)
|
||||
importFrom(Matrix,cBind)
|
||||
importFrom(Matrix,colSums)
|
||||
importFrom(Matrix,sparse.model.matrix)
|
||||
importFrom(Matrix,sparseMatrix)
|
||||
importFrom(Matrix,sparseVector)
|
||||
importFrom(Matrix,t)
|
||||
importFrom(data.table,":=")
|
||||
importFrom(data.table,as.data.table)
|
||||
importFrom(data.table,data.table)
|
||||
|
||||
@@ -524,6 +524,225 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
}
|
||||
|
||||
|
||||
#' Callback closure for collecting the model coefficients history of a gblinear booster
|
||||
#' during its training.
|
||||
#'
|
||||
#' @param sparse when set to FALSE/TURE, a dense/sparse matrix is used to store the result.
|
||||
#' Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
||||
#' when using the "thrifty" feature selector with fairly small number of top features
|
||||
#' selected per iteration.
|
||||
#'
|
||||
#' @details
|
||||
#' To keep things fast and simple, gblinear booster does not internally store the history of linear
|
||||
#' model coefficients at each boosting iteration. This callback provides a workaround for storing
|
||||
#' the coefficients' path, by extracting them after each training iteration.
|
||||
#'
|
||||
#' Callback function expects the following values to be set in its calling frame:
|
||||
#' \code{bst} (or \code{bst_folds}).
|
||||
#'
|
||||
#' @return
|
||||
#' Results are stored in the \code{coefs} element of the closure.
|
||||
#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it.
|
||||
#' With \code{xgb.train}, it is either a dense of a sparse matrix.
|
||||
#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
|
||||
#'
|
||||
#' @examples
|
||||
#' #### Binary classification:
|
||||
#' #
|
||||
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||
#' # without considering the 2nd order interactions:
|
||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
#' colnames(x)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"))
|
||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||
#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
|
||||
#' callbacks = list(cb.gblinear.history()))
|
||||
#' # Extract the coefficients' path and plot them vs boosting iteration number:
|
||||
#' coef_path <- xgb.gblinear.history(bst)
|
||||
#' matplot(coef_path, type = 'l')
|
||||
#'
|
||||
#' # With the deterministic coordinate descent updater, it is safer to use higher learning rates.
|
||||
#' # Will try the classical componentwise boosting which selects a single best feature per round:
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
|
||||
#' updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
|
||||
#' callbacks = list(cb.gblinear.history()))
|
||||
#' xgb.gblinear.history(bst) %>% matplot(type = 'l')
|
||||
#' # Componentwise boosting is known to have similar effect to Lasso regularization.
|
||||
#' # Try experimenting with various values of top_k, eta, nrounds,
|
||||
#' # as well as different feature_selectors.
|
||||
#'
|
||||
#' # For xgb.cv:
|
||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||
#' callbacks = list(cb.gblinear.history()))
|
||||
#' # coefficients in the CV fold #3
|
||||
#' xgb.gblinear.history(bst)[[3]] %>% matplot(type = 'l')
|
||||
#'
|
||||
#'
|
||||
#' #### Multiclass classification:
|
||||
#' #
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1)
|
||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||
#' # to use smaller eta to reduce instability
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 70, eta = 0.5,
|
||||
#' callbacks = list(cb.gblinear.history()))
|
||||
#' # Will plot the coefficient paths separately for each class:
|
||||
#' xgb.gblinear.history(bst, class_index = 0) %>% matplot(type = 'l')
|
||||
#' xgb.gblinear.history(bst, class_index = 1) %>% matplot(type = 'l')
|
||||
#' xgb.gblinear.history(bst, class_index = 2) %>% matplot(type = 'l')
|
||||
#'
|
||||
#' # CV:
|
||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
|
||||
#' callbacks = list(cb.gblinear.history(F)))
|
||||
#' # 1st forld of 1st class
|
||||
#' xgb.gblinear.history(bst, class_index = 0)[[1]] %>% matplot(type = 'l')
|
||||
#'
|
||||
#' @export
|
||||
cb.gblinear.history <- function(sparse=FALSE) {
|
||||
coefs <- NULL
|
||||
|
||||
init <- function(env) {
|
||||
if (!is.null(env$bst)) { # xgb.train:
|
||||
coef_path <- list()
|
||||
} else if (!is.null(env$bst_folds)) { # xgb.cv:
|
||||
coef_path <- rep(list(), length(env$bst_folds))
|
||||
} else stop("Parent frame has neither 'bst' nor 'bst_folds'")
|
||||
}
|
||||
|
||||
# convert from list to (sparse) matrix
|
||||
list2mat <- function(coef_list) {
|
||||
if (sparse) {
|
||||
coef_mat <- sparseMatrix(x = unlist(lapply(coef_list, slot, "x")),
|
||||
i = unlist(lapply(coef_list, slot, "i")),
|
||||
p = c(0, cumsum(sapply(coef_list, function(x) length(x@x)))),
|
||||
dims = c(length(coef_list[[1]]), length(coef_list)))
|
||||
return(t(coef_mat))
|
||||
} else {
|
||||
return(do.call(rbind, coef_list))
|
||||
}
|
||||
}
|
||||
|
||||
finalizer <- function(env) {
|
||||
if (length(coefs) == 0)
|
||||
return()
|
||||
if (!is.null(env$bst)) { # # xgb.train:
|
||||
coefs <<- list2mat(coefs)
|
||||
} else { # xgb.cv:
|
||||
# first lapply transposes the list
|
||||
coefs <<- lapply(seq_along(coefs[[1]]), function(i) lapply(coefs, "[[", i)) %>%
|
||||
lapply(function(x) list2mat(x))
|
||||
}
|
||||
}
|
||||
|
||||
extract.coef <- function(env) {
|
||||
if (!is.null(env$bst)) { # # xgb.train:
|
||||
cf <- as.numeric(grep('(booster|bias|weigh)', xgb.dump(env$bst), invert = TRUE, value = TRUE))
|
||||
if (sparse) cf <- as(cf, "sparseVector")
|
||||
} else { # xgb.cv:
|
||||
cf <- vector("list", length(env$bst_folds))
|
||||
for (i in seq_along(env$bst_folds)) {
|
||||
dmp <- xgb.dump(xgb.handleToBooster(env$bst_folds[[i]]$bst))
|
||||
cf[[i]] <- as.numeric(grep('(booster|bias|weigh)', dmp, invert = TRUE, value = TRUE))
|
||||
if (sparse) cf[[i]] <- as(cf[[i]], "sparseVector")
|
||||
}
|
||||
}
|
||||
cf
|
||||
}
|
||||
|
||||
callback <- function(env = parent.frame(), finalize = FALSE) {
|
||||
if (is.null(coefs)) init(env)
|
||||
if (finalize) return(finalizer(env))
|
||||
cf <- extract.coef(env)
|
||||
coefs <<- c(coefs, list(cf))
|
||||
}
|
||||
|
||||
attr(callback, 'call') <- match.call()
|
||||
attr(callback, 'name') <- 'cb.gblinear.history'
|
||||
callback
|
||||
}
|
||||
|
||||
#' Extract gblinear coefficients history.
|
||||
#'
|
||||
#' A helper function to extract the matrix of linear coefficients' history
|
||||
#' from a gblinear model created while using the \code{cb.gblinear.history()}
|
||||
#' callback.
|
||||
#'
|
||||
#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
|
||||
#' using the \code{cb.gblinear.history()} callback.
|
||||
#' @param class_index zero-based class index to extract the coefficients for only that
|
||||
#' specific class in a multinomial multiclass model. When it is NULL, all the
|
||||
#' coeffients are returned. Has no effect in non-multiclass models.
|
||||
#'
|
||||
#' @return
|
||||
#' For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns
|
||||
#' corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would
|
||||
#' return) and the rows corresponding to boosting iterations.
|
||||
#'
|
||||
#' For an \code{xgb.cv} result, a list of such matrices is returned with the elements
|
||||
#' corresponding to CV folds.
|
||||
#'
|
||||
#' @examples
|
||||
#' See \code{\link{cv.gblinear.history}}
|
||||
#'
|
||||
#' @export
|
||||
xgb.gblinear.history <- function(model, class_index = NULL) {
|
||||
|
||||
if (!(inherits(model, "xgb.Booster") ||
|
||||
inherits(model, "xgb.cv.synchronous")))
|
||||
stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class")
|
||||
is_cv <- inherits(model, "xgb.cv.synchronous")
|
||||
|
||||
if (is.null(model[["callbacks"]]) || is.null(model$callbacks[["cb.gblinear.history"]]))
|
||||
stop("model must be trained while using the cb.gblinear.history() callback")
|
||||
|
||||
if (!is_cv) {
|
||||
# extract num_class & num_feat from the internal model
|
||||
dmp <- xgb.dump(model)
|
||||
if(length(dmp) < 2 || dmp[2] != "bias:")
|
||||
stop("It does not appear to be a gblinear model")
|
||||
dmp <- dmp[-c(1,2)]
|
||||
n <- which(dmp == 'weight:')
|
||||
if(length(n) != 1)
|
||||
stop("It does not appear to be a gblinear model")
|
||||
num_class <- n - 1
|
||||
num_feat <- (length(dmp) - 4) / num_class
|
||||
} else {
|
||||
# in case of CV, the object is expected to have this info
|
||||
if (model$params$booster != "gblinear")
|
||||
stop("It does not appear to be a gblinear model")
|
||||
num_class <- NVL(model$params$num_class, 1)
|
||||
num_feat <- model$nfeatures
|
||||
if (is.null(num_feat))
|
||||
stop("This xgb.cv result does not have nfeatures info")
|
||||
}
|
||||
|
||||
if (!is.null(class_index) &&
|
||||
num_class > 1 &&
|
||||
(class_index[1] < 0 || class_index[1] >= num_class))
|
||||
stop("class_index has to be within [0,", num_class - 1, "]")
|
||||
|
||||
coef_path <- environment(model$callbacks$cb.gblinear.history)[["coefs"]]
|
||||
if (!is.null(class_index) && num_class > 1) {
|
||||
coef_path <- if (is.list(coef_path)) {
|
||||
lapply(coef_path,
|
||||
function(x) x[, seq(1 + class_index, by=num_class, length.out=num_feat)])
|
||||
} else {
|
||||
coef_path <- coef_path[, seq(1 + class_index, by=num_class, length.out=num_feat)]
|
||||
}
|
||||
}
|
||||
coef_path
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# Internal utility functions for callbacks ------------------------------------
|
||||
#
|
||||
|
||||
@@ -88,6 +88,7 @@
|
||||
#' CV-based evaluation means and standard deviations for the training and test CV-sets.
|
||||
#' It is created by the \code{\link{cb.evaluation.log}} callback.
|
||||
#' \item \code{niter} number of boosting iterations.
|
||||
#' \item \code{nfeatures} number of features in training data.
|
||||
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
|
||||
#' parameter or randomly generated.
|
||||
#' \item \code{best_iteration} iteration number with the best evaluation metric value
|
||||
@@ -184,6 +185,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
handle <- xgb.Booster.handle(params, list(dtrain, dtest))
|
||||
list(dtrain = dtrain, bst = handle, watchlist = list(train = dtrain, test=dtest), index = folds[[k]])
|
||||
})
|
||||
rm(dall)
|
||||
# a "basket" to collect some results from callbacks
|
||||
basket <- list()
|
||||
|
||||
@@ -221,6 +223,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
callbacks = callbacks,
|
||||
evaluation_log = evaluation_log,
|
||||
niter = end_iteration,
|
||||
nfeatures = ncol(data),
|
||||
folds = folds
|
||||
)
|
||||
ret <- c(ret, basket)
|
||||
|
||||
@@ -162,6 +162,7 @@
|
||||
#' (only available with early stopping).
|
||||
#' \item \code{feature_names} names of the training dataset features
|
||||
#' (only when comun names were defined in training data).
|
||||
#' \item \code{nfeatures} number of features in training data.
|
||||
#' }
|
||||
#'
|
||||
#' @seealso
|
||||
@@ -363,6 +364,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
bst$callbacks <- callbacks
|
||||
if (!is.null(colnames(dtrain)))
|
||||
bst$feature_names <- colnames(dtrain)
|
||||
|
||||
bst$nfeatures <- ncol(dtrain)
|
||||
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@@ -81,6 +81,8 @@ NULL
|
||||
#' @importFrom Matrix colSums
|
||||
#' @importFrom Matrix sparse.model.matrix
|
||||
#' @importFrom Matrix sparseVector
|
||||
#' @importFrom Matrix sparseMatrix
|
||||
#' @importFrom Matrix t
|
||||
#' @importFrom data.table data.table
|
||||
#' @importFrom data.table is.data.table
|
||||
#' @importFrom data.table as.data.table
|
||||
|
||||
@@ -104,6 +104,7 @@ An object of class \code{xgb.cv.synchronous} with the following elements:
|
||||
CV-based evaluation means and standard deviations for the training and test CV-sets.
|
||||
It is created by the \code{\link{cb.evaluation.log}} callback.
|
||||
\item \code{niter} number of boosting iterations.
|
||||
\item \code{nfeatures} number of features in training data.
|
||||
\item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
|
||||
parameter or randomly generated.
|
||||
\item \code{best_iteration} iteration number with the best evaluation metric value
|
||||
|
||||
@@ -155,6 +155,7 @@ An object of class \code{xgb.Booster} with the following elements:
|
||||
(only available with early stopping).
|
||||
\item \code{feature_names} names of the training dataset features
|
||||
(only when comun names were defined in training data).
|
||||
\item \code{nfeatures} number of features in training data.
|
||||
}
|
||||
}
|
||||
\description{
|
||||
|
||||
@@ -2,18 +2,47 @@ context('Test generalized linear models')
|
||||
|
||||
require(xgboost)
|
||||
|
||||
test_that("glm works", {
|
||||
test_that("gblinear works", {
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
expect_equal(class(dtrain), "xgb.DMatrix")
|
||||
expect_equal(class(dtest), "xgb.DMatrix")
|
||||
|
||||
param <- list(objective = "binary:logistic", booster = "gblinear",
|
||||
nthread = 2, alpha = 0.0001, lambda = 1)
|
||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
num_round <- 2
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||
|
||||
n <- 5 # iterations
|
||||
ERR_UL <- 0.005 # upper limit for the test set error
|
||||
VERB <- 0 # chatterbox switch
|
||||
|
||||
param$updater = 'shotgun'
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
||||
ypred <- predict(bst, dtest)
|
||||
expect_equal(length(getinfo(dtest, 'label')), 1611)
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
|
||||
callbacks = list(cb.gblinear.history()))
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
h <- xgb.gblinear.history(bst)
|
||||
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
||||
expect_is(h, "matrix")
|
||||
|
||||
param$updater = 'coord_descent'
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
|
||||
expect_lt(bst$evaluation_log$eval_error[2], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
|
||||
top_n = 50, callbacks = list(cb.gblinear.history(sparse = TRUE)))
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
h <- xgb.gblinear.history(bst)
|
||||
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
||||
expect_s4_class(h, "dgCMatrix")
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user