SHAP values for feature contributions (#2438)

* SHAP values for feature contributions * Fix commenting error * New polynomial time SHAP value estimation algorithm * Update API to support SHAP values * Fix merge conflicts with updates in master * Correct submodule hashes * Fix variable sized stack allocation * Make lint happy * Add docs * Fix typo * Adjust tolerances * Remove unneeded def * Fixed cpp test setup * Updated R API and cleaned up * Fixed test typo
2017-10-12 12:35:51 -07:00
parent ff9180cd73
commit 78c4188cec
16 changed files with 369 additions and 143 deletions
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -39,7 +39,7 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
 is.null.handle <- function(handle) {
  if (!identical(class(handle), "xgb.Booster.handle"))
    stop("argument type must be xgb.Booster.handle")
-  
+
  if (is.null(handle) || .Call(XGCheckNullPtr_R, handle))
    return(TRUE)
  return(FALSE)
@@ -61,49 +61,49 @@ xgb.get.handle <- function(object) {

 #' Restore missing parts of an incomplete xgb.Booster object.
 #'
-#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing 
+#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
 #' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
-#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid 
+#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
 #' but it has a raw Booster memory dump).
-#' 
+#'
 #' @param object object of class \code{xgb.Booster}
-#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data 
+#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
 #'                when it doesn't already exist.
-#' 
+#'
 #' @details
-#' 
+#'
 #' While this method is primarily for internal use, it might be useful in some practical situations.
-#' 
+#'
 #' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
-#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods 
-#' should still work for such a model object since those methods would be using 
-#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the  
+#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
+#' should still work for such a model object since those methods would be using
+#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
 #' \code{xgb.Booster.complete} function explicitely once after loading a model as an R-object.
 #' That would prevent further repeated implicit reconstruction of an internal booster model.
-#' 
-#' @return 
+#'
+#' @return
 #' An object of \code{xgb.Booster} class.
-#' 
+#'
 #' @examples
-#' 
+#'
 #' data(agaricus.train, package='xgboost')
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
 #'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 #' saveRDS(bst, "xgb.model.rds")
-#' 
+#'
 #' bst1 <- readRDS("xgb.model.rds")
 #' # the handle is invalid:
 #' print(bst1$handle)
-#' 
+#'
 #' bst1 <- xgb.Booster.complete(bst1)
 #' # now the handle points to a valid internal booster model:
 #' print(bst1$handle)
-#' 
+#'
 #' @export
 xgb.Booster.complete <- function(object, saveraw = TRUE) {
  if (!inherits(object, "xgb.Booster"))
    stop("argument type must be xgb.Booster")
-  
+
  if (is.null.handle(object$handle)) {
    object$handle <- xgb.Booster.handle(modelfile = object$raw)
  } else {
@@ -114,88 +114,90 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 }

 #' Predict method for eXtreme Gradient Boosting model
-#' 
+#'
 #' Predicted values based on either xgboost model or model handle object.
-#' 
+#'
 #' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
 #' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or \code{xgb.DMatrix}.
 #' @param missing Missing is only used when input is dense matrix. Pick a float value that represents
 #'        missing values in data (e.g., sometimes 0 or some other extreme value is used).
-#' @param outputmargin whether the prediction should be returned in the for of original untransformed 
-#'        sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for 
+#' @param outputmargin whether the prediction should be returned in the for of original untransformed
+#'        sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
 #'        logistic regression would result in predictions for log-odds instead of probabilities.
 #' @param ntreelimit limit the number of model's trees or boosting iterations used in prediction (see Details).
 #'        It will use all the trees by default (\code{NULL} value).
 #' @param predleaf whether predict leaf index instead.
 #' @param predcontrib whether to return feature contributions to individual predictions instead (see Details).
-#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several 
+#' @param approxcontrib whether to use a fast approximation for feature contributions (see Details).
+#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
 #'        prediction outputs per case. This option has no effect when \code{predleaf = TRUE}.
 #' @param ... Parameters passed to \code{predict.xgb.Booster}
-#' 
-#' @details  
+#'
+#' @details
 #' Note that \code{ntreelimit} is not necessarily equal to the number of boosting iterations
 #' and it is not necessarily equal to the number of trees in a model.
 #' E.g., in a random forest-like model, \code{ntreelimit} would limit the number of trees.
-#' But for multiclass classification, while there are multiple trees per iteration, 
+#' But for multiclass classification, while there are multiple trees per iteration,
 #' \code{ntreelimit} limits the number of boosting iterations.
-#' 
-#' Also note that \code{ntreelimit} would currently do nothing for predictions from gblinear, 
+#'
+#' Also note that \code{ntreelimit} would currently do nothing for predictions from gblinear,
 #' since gblinear doesn't keep its boosting history.
-#' 
-#' One possible practical applications of the \code{predleaf} option is to use the model 
-#' as a generator of new features which capture non-linearity and interactions, 
+#'
+#' One possible practical applications of the \code{predleaf} option is to use the model
+#' as a generator of new features which capture non-linearity and interactions,
 #' e.g., as implemented in \code{\link{xgb.create.features}}.
-#' 
+#'
 #' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
 #' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
-#' (feature_beta * feature_value). For "gbtree" booster, feature contribution is calculated 
-#' as a sum of average contribution of that feature's split nodes across all trees to an 
-#' individual prediction, following the idea explained in 
-#' \url{http://blog.datadive.net/interpreting-random-forests/}.
-#' 
-#' @return 
+#' (feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
+#' values (https://arxiv.org/abs/1706.06060) that sum to the difference between the expected output
+#' of the model and the current prediction (where the hessian weights are used to compute the expectations).
+#' Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
+#' in \url{http://blog.datadive.net/interpreting-random-forests/}.
+#'
+#' @return
 #' For regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
-#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or 
-#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on 
+#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
+#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
 #' the \code{reshape} value.
-#' 
-#' When \code{predleaf = TRUE}, the output is a matrix object with the 
+#'
+#' When \code{predleaf = TRUE}, the output is a matrix object with the
 #' number of columns corresponding to the number of trees.
-#' 
+#'
 #' When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
 #' \code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
 #' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
-#' such a matrix. The contribution values are on the scale of untransformed margin 
+#' such a matrix. The contribution values are on the scale of untransformed margin
 #' (e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
-#' 
+#'
 #' @seealso
 #' \code{\link{xgb.train}}.
-#' 
+#'
 #' @examples
 #' ## binary classification:
-#' 
+#'
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' 
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
+#'
+#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
 #'                eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
 #' # use all trees by default
 #' pred <- predict(bst, test$data)
 #' # use only the 1st tree
 #' pred1 <- predict(bst, test$data, ntreelimit = 1)
-#' 
+#'
 #' # Predicting tree leafs:
 #' # the result is an nsamples X ntrees matrix
 #' pred_leaf <- predict(bst, test$data, predleaf = TRUE)
 #' str(pred_leaf)
-#' 
+#'
 #' # Predicting feature contributions to predictions:
 #' # the result is an nsamples X (nfeatures + 1) matrix
 #' pred_contr <- predict(bst, test$data, predcontrib = TRUE)
 #' str(pred_contr)
-#' # verify that contributions' sums are equal to log-odds of predictions (up to foat precision):
+#' # verify that contributions' sums are equal to log-odds of predictions (up to float precision):
 #' summary(rowSums(pred_contr) - qlogis(pred))
 #' # for the 1st record, let's inspect its features that had non-zero contribution to prediction:
 #' contr1 <- pred_contr[1,]
@@ -206,10 +208,10 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' par(mar = old_mar + c(0,7,0,0))
 #' barplot(contr1, horiz = TRUE, las = 2, xlab = "contribution to prediction in log-odds")
 #' par(mar = old_mar)
-#' 
-#' 
+#'
+#'
 #' ## multiclass classification in iris dataset:
-#' 
+#'
 #' lb <- as.numeric(iris$Species) - 1
 #' num_class <- 3
 #' set.seed(11)
@@ -225,7 +227,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' pred_labels <- max.col(pred) - 1
 #' # the following should result in the same error as seen in the last iteration
 #' sum(pred_labels != lb)/length(lb)
-#' 
+#'
 #' # compare that to the predictions from softmax:
 #' set.seed(11)
 #' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
@@ -234,14 +236,14 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
 #' all.equal(pred, pred_labels)
-#' # prediction from using only 5 iterations should result 
+#' # prediction from using only 5 iterations should result
 #' # in the same error as seen in iteration 5:
 #' pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
 #' sum(pred5 != lb)/length(lb)
-#' 
-#' 
+#'
+#'
 #' ## random forest-like model of 25 trees for binary classification:
-#' 
+#'
 #' set.seed(11)
 #' bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
 #'                nthread = 2, nrounds = 1, objective = "binary:logistic",
@@ -258,7 +260,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' @rdname predict.xgb.Booster
 #' @export
 predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
-                                predleaf = FALSE, predcontrib = FALSE, reshape = FALSE, ...) {
+                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, reshape = FALSE, ...) {

  object <- xgb.Booster.complete(object, saveraw = FALSE)
  if (!inherits(newdata, "xgb.DMatrix"))
@@ -269,18 +271,18 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    ntreelimit <- 0
  if (ntreelimit < 0)
    stop("ntreelimit cannot be negative")
-  
-  option <- 0L + 1L * as.logical(outputmargin) + 2L * as.logical(predleaf) + 4L * as.logical(predcontrib)
-  
+
+  option <- 0L + 1L * as.logical(outputmargin) + 2L * as.logical(predleaf) + 4L * as.logical(predcontrib) + 8L * as.logical(approxcontrib)
+
  ret <- .Call(XGBoosterPredict_R, object$handle, newdata, option[1], as.integer(ntreelimit))
-  
+
  n_ret <- length(ret)
  n_row <- nrow(newdata)
  npred_per_case <- n_ret / n_row
-  
+
  if (n_ret %% n_row != 0)
    stop("prediction length ", n_ret, " is not multiple of nrows(newdata) ", n_row)
-  
+
  if (predleaf) {
    ret <- if (n_ret == n_row) {
      matrix(ret, ncol = 1)
@@ -325,9 +327,9 @@ predict.xgb.Booster.handle <- function(object, ...) {
 #'
 #' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
 #' @param name a non-empty character string specifying which attribute is to be accessed.
-#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-} 
-#'        it's a list (or an object coercible to a list) with the names of attributes to set 
-#'        and the elements corresponding to attribute values. 
+#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
+#'        it's a list (or an object coercible to a list) with the names of attributes to set
+#'        and the elements corresponding to attribute values.
 #'        Non-character values are converted to character.
 #'        When attribute value is not a scalar, only the first index is used.
 #'        Use \code{NULL} to remove an attribute.
@@ -336,32 +338,32 @@ predict.xgb.Booster.handle <- function(object, ...) {
 #' The primary purpose of xgboost model attributes is to store some meta-data about the model.
 #' Note that they are a separate concept from the object attributes in R.
 #' Specifically, they refer to key-value strings that can be attached to an xgboost model,
-#' stored together with the model's binary representation, and accessed later 
+#' stored together with the model's binary representation, and accessed later
 #' (from R or any other interface).
 #' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
 #' would not be saved by \code{xgb.save} because an xgboost model is an external memory object
 #' and its serialization is handled externally.
-#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't 
-#' change the value of that parameter for a model. 
+#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
+#' change the value of that parameter for a model.
 #' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
-#' 
+#'
 #' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
 #' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
 #' That would only matter if attributes need to be set many times.
 #' Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
-#' the raw model cache of an \code{xgb.Booster} object would not be automatically updated, 
+#' the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
 #' and it would be user's responsibility to call \code{xgb.save.raw} to update it.
-#' 
-#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes, 
+#'
+#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
 #' but it doesn't delete the other existing attributes.
-#' 
+#'
 #' @return
-#' \code{xgb.attr} returns either a string value of an attribute 
+#' \code{xgb.attr} returns either a string value of an attribute
 #' or \code{NULL} if an attribute wasn't stored in a model.
-#' 
-#' \code{xgb.attributes} returns a list of all attribute stored in a model 
+#'
+#' \code{xgb.attributes} returns a list of all attribute stored in a model
 #' or \code{NULL} if a model has no stored attributes.
-#' 
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' train <- agaricus.train
@@ -377,13 +379,13 @@ predict.xgb.Booster.handle <- function(object, ...) {
 #' bst1 <- xgb.load('xgb.model')
 #' print(xgb.attr(bst1, "my_attribute"))
 #' print(xgb.attributes(bst1))
-#' 
+#'
 #' # deletion:
 #' xgb.attr(bst1, "my_attribute") <- NULL
 #' print(xgb.attributes(bst1))
 #' xgb.attributes(bst1) <- list(a = NULL, b = NULL)
 #' print(xgb.attributes(bst1))
-#' 
+#'
 #' @rdname xgb.attr
 #' @export
 xgb.attr <- function(object, name) {
@@ -464,7 +466,7 @@ xgb.attributes <- function(object) {
 #' @details
 #' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
 #' than for \code{xgb.Booster}, since only just a handle would need to be copied.
-#' 
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' train <- agaricus.train
@@ -473,7 +475,7 @@ xgb.attributes <- function(object) {
 #'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 #'
 #' xgb.parameters(bst) <- list(eta = 0.1)
-#' 
+#'
 #' @rdname xgb.parameters
 #' @export
 `xgb.parameters<-` <- function(object, value) {
@@ -503,28 +505,28 @@ xgb.ntree <- function(bst) {


 #' Print xgb.Booster
-#' 
+#'
 #' Print information about xgb.Booster.
-#' 
+#'
 #' @param x an xgb.Booster object
 #' @param verbose whether to print detailed data (e.g., attribute values)
 #' @param ... not currently used
-#' 
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' train <- agaricus.train
 #' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
 #'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 #' attr(bst, 'myattr') <- 'memo'
-#' 
+#'
 #' print(bst)
 #' print(bst, verbose=TRUE)
 #'
-#' @method print xgb.Booster 
+#' @method print xgb.Booster
 #' @export
 print.xgb.Booster <- function(x, verbose = FALSE, ...) {
  cat('##### xgb.Booster\n')
-  
+
  valid_handle <- is.null.handle(x$handle)
  if (!valid_handle)
    cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
@@ -539,10 +541,10 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
    cat('call:\n  ')
    print(x$call)
  }
-  
+
  if (!is.null(x$params)) {
    cat('params (as set within xgb.train):\n')
-    cat( '  ', 
+    cat( '  ',
         paste(names(x$params),
               paste0('"', unlist(x$params), '"'),
               sep = ' = ', collapse = ', '), '\n', sep = '')
@@ -562,7 +564,7 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
      cat('  ', paste(names(attrs), collapse = ', '), '\n', sep = '')
    }
  }
-  
+
  if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
    cat('callbacks:\n')
    lapply(callback.calls(x$callbacks), function(x) {
@@ -570,14 +572,14 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
      print(x)
    })
  }
-  
+
  if (!is.null(x$feature_names))
    cat('# of features:', length(x$feature_names), '\n')
-  
+
  cat('niter: ', x$niter, '\n', sep = '')
  # TODO: uncomment when faster xgb.ntree is implemented
  #cat('ntree: ', xgb.ntree(x), '\n', sep='')
-  
+
  for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
                                'evaluation_log','niter','feature_names'))) {
    if (is.atomic(x[[n]])) {
@@ -587,11 +589,11 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
      print(x[[n]])
    }
  }
-  
+
  if (!is.null(x$evaluation_log)) {
    cat('evaluation_log:\n')
    print(x$evaluation_log, row.names = FALSE, topn = 2)
  }
-  
+
  invisible(x)
 }
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -52,9 +52,9 @@ test_that("xgb.dump works", {

 test_that("xgb.dump works for gblinear", {
  expect_length(xgb.dump(bst.GLM), 14)
-  # also make sure that it works properly for a sparse model where some coefficients 
+  # also make sure that it works properly for a sparse model where some coefficients
  # are 0 from setting large L1 regularization:
-  bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1, 
+  bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1,
                        alpha=2, objective = "binary:logistic", booster = "gblinear")
  d.sp <- xgb.dump(bst.GLM.sp)
  expect_length(d.sp, 14)
@@ -80,28 +80,35 @@ test_that("predict feature contributions works", {
  expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
  expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
  pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
-  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-6)
-  
+  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
+
+  # gbtree binary classifier (approximate method)
+  expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
+  expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
+  expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
+  pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
+  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
+
  # gblinear binary classifier
  expect_error(pred_contr <- predict(bst.GLM, sparse_matrix, predcontrib = TRUE), regexp = NA)
  expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
  expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
  pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)
-  expect_lt(max(abs(rowSums(pred_contr) - pred)), 2e-6)
+  expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
  # manual calculation of linear terms
  coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
  coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
  pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
-  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 2e-6)
+  expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5)

-  # gbtree multiclass  
+  # gbtree multiclass
  pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
  pred_contr <- predict(mbst.Tree, as.matrix(iris[, -5]), predcontrib = TRUE)
  expect_is(pred_contr, "list")
  expect_length(pred_contr, 3)
  for (g in seq_along(pred_contr)) {
    expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
-    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6)
+    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 1e-5)
  }

  # gblinear multiclass (set base_score = 0, which is base margin in multiclass)
@@ -192,10 +199,10 @@ test_that("xgb.model.dt.tree works with and without feature names", {
  expect_equal(names.dt.trees, names(dt.tree))
  expect_equal(dim(dt.tree), c(188, 10))
  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
-  
+
  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
  expect_equal(dt.tree, dt.tree.0)
-  
+
  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
@@ -219,20 +226,20 @@ test_that("xgb.importance works with and without feature names", {
  expect_equal(dim(importance.Tree), c(7, 4))
  expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
  expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
-  
+
  importance.Tree.0 <- xgb.importance(model = bst.Tree)
  expect_equal(importance.Tree, importance.Tree.0)
-  
+
  # when model contains no feature names:
  bst.Tree.x <- bst.Tree
  bst.Tree.x$feature_names <- NULL
  importance.Tree.x <- xgb.importance(model = bst.Tree)
  expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
-  
+
  imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
  expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
  xgb.ggplot.importance(importance_matrix = importance.Tree)
-  
+
  # for multiclass
  imp.Tree <- xgb.importance(model = mbst.Tree)
  expect_equal(dim(imp.Tree), c(4, 4))
@@ -247,7 +254,7 @@ test_that("xgb.importance works with GLM model", {
  imp2plot <- xgb.plot.importance(importance.GLM)
  expect_equal(colnames(imp2plot), c("Feature", "Weight", "Importance"))
  xgb.ggplot.importance(importance.GLM)
-  
+
  # for multiclass
  imp.GLM <- xgb.importance(model = mbst.GLM)
  expect_equal(dim(imp.GLM), c(12, 3))