[R] On-demand serialization + standardization of attributes (#9924)

--------- Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
2024-01-10 22:08:42 +01:00
parent 01c4711556
commit d3a8d284ab
64 changed files with 1773 additions and 1281 deletions
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -228,7 +228,7 @@ cb.reset.parameters <- function(new_params) {
    })

    if (!is.null(env$bst)) {
-      xgb.parameters(env$bst$handle) <- pars
+      xgb.parameters(env$bst) <- pars
    } else {
      for (fd in env$bst_folds)
        xgb.parameters(fd$bst) <- pars
@@ -333,13 +333,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
    if (!is.null(env$bst)) {
      if (!inherits(env$bst, 'xgb.Booster'))
        stop("'bst' in the parent frame must be an 'xgb.Booster'")
-      if (!is.null(best_score <- xgb.attr(env$bst$handle, 'best_score'))) {
+      if (!is.null(best_score <- xgb.attr(env$bst, 'best_score'))) {
        best_score <<- as.numeric(best_score)
-        best_iteration <<- as.numeric(xgb.attr(env$bst$handle, 'best_iteration')) + 1
-        best_msg <<- as.numeric(xgb.attr(env$bst$handle, 'best_msg'))
+        best_iteration <<- as.numeric(xgb.attr(env$bst, 'best_iteration')) + 1
+        best_msg <<- as.numeric(xgb.attr(env$bst, 'best_msg'))
      } else {
-        xgb.attributes(env$bst$handle) <- list(best_iteration = best_iteration - 1,
-                                               best_score = best_score)
+        xgb.attributes(env$bst) <- list(best_iteration = best_iteration - 1,
+                                        best_score = best_score)
      }
    } else if (is.null(env$bst_folds) || is.null(env$basket)) {
      stop("Parent frame has neither 'bst' nor ('bst_folds' and 'basket')")
@@ -348,7 +348,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,

  finalizer <- function(env) {
    if (!is.null(env$bst)) {
-      attr_best_score <- as.numeric(xgb.attr(env$bst$handle, 'best_score'))
+      attr_best_score <- as.numeric(xgb.attr(env$bst, 'best_score'))
      if (best_score != attr_best_score) {
        # If the difference is too big, throw an error
        if (abs(best_score - attr_best_score) >= 1e-14) {
@@ -358,9 +358,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
        # If the difference is due to floating-point truncation, update best_score
        best_score <- attr_best_score
      }
-      env$bst$best_iteration <- best_iteration
-      env$bst$best_ntreelimit <- best_ntreelimit
-      env$bst$best_score <- best_score
+      xgb.attr(env$bst, "best_iteration") <- best_iteration
+      xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
+      xgb.attr(env$bst, "best_score") <- best_score
    } else {
      env$basket$best_iteration <- best_iteration
      env$basket$best_ntreelimit <- best_ntreelimit
@@ -412,11 +412,15 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
 #' @param save_period save the model to disk after every
 #'        \code{save_period} iterations; 0 means save the model at the end.
 #' @param save_name the name or path for the saved model file.
+#'
+#'        Note that the format of the model being saved is determined by the file
+#'        extension specified here (see \link{xgb.save} for details about how it works).
+#'
 #'        It can contain a \code{\link[base]{sprintf}} formatting specifier
 #'        to include the integer iteration number in the file name.
-#'        E.g., with \code{save_name} = 'xgboost_%04d.model',
-#'        the file saved at iteration 50 would be named "xgboost_0050.model".
-#'
+#'        E.g., with \code{save_name} = 'xgboost_%04d.ubj',
+#'        the file saved at iteration 50 would be named "xgboost_0050.ubj".
+#' @seealso \link{xgb.save}
 #' @details
 #' This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
 #'
@@ -430,7 +434,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
 #' \code{\link{callbacks}}
 #'
 #' @export
-cb.save.model <- function(save_period = 0, save_name = "xgboost.model") {
+cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {

  if (save_period < 0)
    stop("'save_period' cannot be negative")
@@ -440,8 +444,13 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.model") {
      stop("'save_model' callback requires the 'bst' booster object in its calling frame")

    if ((save_period > 0 && (env$iteration - env$begin_iteration) %% save_period == 0) ||
-        (save_period == 0 && env$iteration == env$end_iteration))
-      xgb.save(env$bst, sprintf(save_name, env$iteration))
+        (save_period == 0 && env$iteration == env$end_iteration)) {
+      # Note: this throws a warning if the name doesn't have anything to format through 'sprintf'
+      suppressWarnings({
+        save_name <- sprintf(save_name, env$iteration)
+      })
+      xgb.save(env$bst, save_name)
+    }
  }
  attr(callback, 'call') <- match.call()
  attr(callback, 'name') <- 'cb.save.model'
@@ -512,8 +521,7 @@ cb.cv.predict <- function(save_models = FALSE) {
    env$basket$pred <- pred
    if (save_models) {
      env$basket$models <- lapply(env$bst_folds, function(fd) {
-        xgb.attr(fd$bst, 'niter') <- env$end_iteration - 1
-        xgb.Booster.complete(xgb.handleToBooster(handle = fd$bst, raw = NULL), saveraw = TRUE)
+        return(fd$bst)
      })
    }
  }
@@ -665,7 +673,7 @@ cb.gblinear.history <- function(sparse = FALSE) {
    } else { # xgb.cv:
      cf <- vector("list", length(env$bst_folds))
      for (i in seq_along(env$bst_folds)) {
-        dmp <- xgb.dump(xgb.handleToBooster(handle = env$bst_folds[[i]]$bst, raw = NULL))
+        dmp <- xgb.dump(env$bst_folds[[i]]$bst)
        cf[[i]] <- as.numeric(grep('(booster|bias|weigh)', dmp, invert = TRUE, value = TRUE))
        if (sparse) cf[[i]] <- as(cf[[i]], "sparseVector")
      }
@@ -685,14 +693,19 @@ cb.gblinear.history <- function(sparse = FALSE) {
  callback
 }

-#' Extract gblinear coefficients history.
-#'
-#' A helper function to extract the matrix of linear coefficients' history
+#' @title Extract gblinear coefficients history.
+#' @description A helper function to extract the matrix of linear coefficients' history
 #' from a gblinear model created while using the \code{cb.gblinear.history()}
 #' callback.
+#' @details Note that this is an R-specific function that relies on R attributes that
+#' are not saved when using xgboost's own serialization functions like \link{xgb.load}
+#' or \link{xgb.load.raw}.
 #'
+#' In order for a serialized model to be accepted by tgis function, one must use R
+#' serializers such as \link{saveRDS}.
 #' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
-#'        using the \code{cb.gblinear.history()} callback.
+#'        using the \code{cb.gblinear.history()} callback, but \bold{not} a booster
+#'        loaded from \link{xgb.load} or \link{xgb.load.raw}.
 #' @param class_index zero-based class index to extract the coefficients for only that
 #'        specific class in a multinomial multiclass model. When it is NULL, all the
 #'        coefficients are returned. Has no effect in non-multiclass models.
@@ -713,20 +726,18 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
    stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class")
  is_cv <- inherits(model, "xgb.cv.synchronous")

-  if (is.null(model[["callbacks"]]) || is.null(model$callbacks[["cb.gblinear.history"]]))
+  if (is_cv) {
+    callbacks <- model$callbacks
+  } else {
+    callbacks <- attributes(model)$callbacks
+  }
+
+  if (is.null(callbacks) || is.null(callbacks$cb.gblinear.history))
    stop("model must be trained while using the cb.gblinear.history() callback")

  if (!is_cv) {
-    # extract num_class & num_feat from the internal model
-    dmp <- xgb.dump(model)
-    if (length(dmp) < 2 || dmp[2] != "bias:")
-      stop("It does not appear to be a gblinear model")
-    dmp <- dmp[-c(1, 2)]
-    n <- which(dmp == 'weight:')
-    if (length(n) != 1)
-      stop("It does not appear to be a gblinear model")
-    num_class <- n - 1
-    num_feat <- (length(dmp) - 4) / num_class
+    num_class <- xgb.num_class(model)
+    num_feat <- xgb.num_feature(model)
  } else {
    # in case of CV, the object is expected to have this info
    if (model$params$booster != "gblinear")
@@ -742,7 +753,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
      (class_index[1] < 0 || class_index[1] >= num_class))
    stop("class_index has to be within [0,", num_class - 1, "]")

-  coef_path <- environment(model$callbacks$cb.gblinear.history)[["coefs"]]
+  coef_path <- environment(callbacks$cb.gblinear.history)[["coefs"]]
  if (!is.null(class_index) && num_class > 1) {
    coef_path <- if (is.list(coef_path)) {
      lapply(coef_path,
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -148,19 +148,17 @@ check.custom.eval <- function(env = parent.frame()) {


 # Update a booster handle for an iteration with dtrain data
-xgb.iter.update <- function(booster_handle, dtrain, iter, obj) {
-  if (!identical(class(booster_handle), "xgb.Booster.handle")) {
-    stop("booster_handle must be of xgb.Booster.handle class")
-  }
+xgb.iter.update <- function(bst, dtrain, iter, obj) {
  if (!inherits(dtrain, "xgb.DMatrix")) {
    stop("dtrain must be of xgb.DMatrix class")
  }
+  handle <- xgb.get.handle(bst)

  if (is.null(obj)) {
-    .Call(XGBoosterUpdateOneIter_R, booster_handle, as.integer(iter), dtrain)
+    .Call(XGBoosterUpdateOneIter_R, handle, as.integer(iter), dtrain)
  } else {
    pred <- predict(
-      booster_handle,
+      bst,
      dtrain,
      outputmargin = TRUE,
      training = TRUE,
@@ -185,7 +183,7 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj) {
    }

    .Call(
-      XGBoosterTrainOneIter_R, booster_handle, dtrain, iter, grad, hess
+      XGBoosterTrainOneIter_R, handle, dtrain, iter, grad, hess
    )
  }
  return(TRUE)
@@ -195,23 +193,22 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj) {
 # Evaluate one iteration.
 # Returns a named vector of evaluation metrics
 # with the names in a 'datasetname-metricname' format.
-xgb.iter.eval <- function(booster_handle, watchlist, iter, feval) {
-  if (!identical(class(booster_handle), "xgb.Booster.handle"))
-    stop("class of booster_handle must be xgb.Booster.handle")
+xgb.iter.eval <- function(bst, watchlist, iter, feval) {
+  handle <- xgb.get.handle(bst)

  if (length(watchlist) == 0)
    return(NULL)

  evnames <- names(watchlist)
  if (is.null(feval)) {
-    msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
+    msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
    mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
    res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
  } else {
    res <- sapply(seq_along(watchlist), function(j) {
      w <- watchlist[[j]]
      ## predict using all trees
-      preds <- predict(booster_handle, w, outputmargin = TRUE, iterationrange = c(1, 1))
+      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
      eval_res <- feval(preds, w)
      out <- eval_res$value
      names(out) <- paste0(evnames[j], "-", eval_res$metric)
@@ -352,16 +349,45 @@ xgb.createFolds <- function(y, k) {
 #' @name xgboost-deprecated
 NULL

-#' Do not use \code{\link[base]{saveRDS}} or \code{\link[base]{save}} for long-term archival of
-#' models. Instead, use \code{\link{xgb.save}} or \code{\link{xgb.save.raw}}.
+#' @title Model Serialization and Compatibility
+#' @description
 #'
-#' It is a common practice to use the built-in \code{\link[base]{saveRDS}} function (or
-#' \code{\link[base]{save}}) to persist R objects to the disk. While it is possible to persist
-#' \code{xgb.Booster} objects using \code{\link[base]{saveRDS}}, it is not advisable to do so if
-#' the model is to be accessed in the future. If you train a model with the current version of
-#' XGBoost and persist it with \code{\link[base]{saveRDS}}, the model is not guaranteed to be
-#' accessible in later releases of XGBoost. To ensure that your model can be accessed in future
-#' releases of XGBoost, use \code{\link{xgb.save}} or \code{\link{xgb.save.raw}} instead.
+#' When it comes to serializing XGBoost models, it's possible to use R serializers such as
+#' \link{save} or \link{saveRDS} to serialize an XGBoost R model, but XGBoost also provides
+#' its own serializers with better compatibility guarantees, which allow loading
+#' said models in other language bindings of XGBoost.
+#'
+#' Note that an `xgb.Booster` object, outside of its core components, might also keep:\itemize{
+#' \item Additional model configuration (accessible through \link{xgb.config}),
+#' which includes model fitting parameters like `max_depth` and runtime parameters like `nthread`.
+#' These are not necessarily useful for prediction/importance/plotting.
+#' \item Additional R-specific attributes  - e.g. results of callbacks, such as evaluation logs,
+#' which are kept as a `data.table` object, accessible through `attributes(model)$evaluation_log`
+#' if present.
+#' }
+#'
+#' The first one (configurations) does not have the same compatibility guarantees as
+#' the model itself, including attributes that are set and accessed through \link{xgb.attributes} - that is, such configuration
+#' might be lost after loading the booster in a different XGBoost version, regardless of the
+#' serializer that was used. These are saved when using \link{saveRDS}, but will be discarded
+#' if loaded into an incompatible XGBoost version. They are not saved when using XGBoost's
+#' serializers from its public interface including \link{xgb.save} and \link{xgb.save.raw}.
+#'
+#' The second ones (R attributes) are not part of the standard XGBoost model structure, and thus are
+#' not saved when using XGBoost's own serializers. These attributes are only used for informational
+#' purposes, such as keeping track of evaluation metrics as the model was fit, or saving the R
+#' call that produced the model, but are otherwise not used for prediction / importance / plotting / etc.
+#' These R attributes are only preserved when using R's serializers.
+#'
+#' Note that XGBoost models in R starting from version `2.1.0` and onwards, and XGBoost models
+#' before version `2.1.0`; have a very different R object structure and are incompatible with
+#' each other. Hence, models that were saved with R serializers live `saveRDS` or `save` before
+#' version `2.1.0` will not work with latter `xgboost` versions and vice versa. Be aware that
+#' the structure of R model objects could in theory change again in the future, so XGBoost's serializers
+#' should be preferred for long-term storage.
+#'
+#' Furthermore, note that using the package `qs` for serialization will require version 0.26 or
+#' higher of said package, and will have the same compatibility restrictions as R serializers.
 #'
 #' @details
 #' Use \code{\link{xgb.save}} to save the XGBoost model as a stand-alone file. You may opt into
@@ -374,9 +400,10 @@ NULL
 #' The \code{\link{xgb.save.raw}} function is useful if you'd like to persist the XGBoost model
 #' as part of another R object.
 #'
-#' Note: Do not use \code{\link{xgb.serialize}} to store models long-term. It persists not only the
-#' model but also internal configurations and parameters, and its format is not stable across
-#' multiple XGBoost versions. Use \code{\link{xgb.serialize}} only for checkpointing.
+#' Use \link{saveRDS} if you require the R-specific attributes that a booster might have, such
+#' as evaluation logs, but note that future compatibility of such objects is outside XGBoost's
+#' control as it relies on R's serialization format (see e.g. the details section in
+#' \link{serialize} and \link{save} from base R).
 #'
 #' For more details and explanation about model persistence and archival, consult the page
 #' \url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -1,180 +1,85 @@
-# Construct an internal xgboost Booster and return a handle to it.
+# Construct an internal xgboost Booster and get its current number of rounds.
 # internal utility function
-xgb.Booster.handle <- function(params, cachelist, modelfile, handle) {
+# Note: the number of rounds in the C booster gets reset to zero when changing
+# key booster parameters like 'process_type=update', but in some cases, when
+# replacing previous iterations, it needs to make a check that the new number
+# of iterations doesn't exceed the previous ones, hence it keeps track of the
+# current number of iterations before resetting the parameters in order to
+# perform the check later on.
+xgb.Booster <- function(params, cachelist, modelfile) {
  if (typeof(cachelist) != "list" ||
      !all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
    stop("cachelist must be a list of xgb.DMatrix objects")
  }
  ## Load existing model, dispatch for on disk model file and in memory buffer
  if (!is.null(modelfile)) {
-    if (typeof(modelfile) == "character") {
+    if (is.character(modelfile)) {
      ## A filename
-      handle <- .Call(XGBoosterCreate_R, cachelist)
+      bst <- .Call(XGBoosterCreate_R, cachelist)
      modelfile <- path.expand(modelfile)
-      .Call(XGBoosterLoadModel_R, handle, enc2utf8(modelfile[1]))
-      class(handle) <- "xgb.Booster.handle"
+      .Call(XGBoosterLoadModel_R, xgb.get.handle(bst), enc2utf8(modelfile[1]))
+      niter <- xgb.get.num.boosted.rounds(bst)
      if (length(params) > 0) {
-        xgb.parameters(handle) <- params
+        xgb.parameters(bst) <- params
      }
-      return(handle)
-    } else if (typeof(modelfile) == "raw") {
+      return(list(bst = bst, niter = niter))
+    } else if (is.raw(modelfile)) {
      ## A memory buffer
-      bst <- xgb.unserialize(modelfile, handle)
+      bst <- xgb.load.raw(modelfile)
+      niter <- xgb.get.num.boosted.rounds(bst)
      xgb.parameters(bst) <- params
-      return(bst)
+      return(list(bst = bst, niter = niter))
    } else if (inherits(modelfile, "xgb.Booster")) {
      ## A booster object
-      bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
-      bst <- xgb.unserialize(bst$raw)
+      bst <- .Call(XGDuplicate_R, modelfile)
+      niter <- xgb.get.num.boosted.rounds(bst)
      xgb.parameters(bst) <- params
-      return(bst)
+      return(list(bst = bst, niter = niter))
    } else {
      stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
    }
  }
  ## Create new model
-  handle <- .Call(XGBoosterCreate_R, cachelist)
-  class(handle) <- "xgb.Booster.handle"
+  bst <- .Call(XGBoosterCreate_R, cachelist)
  if (length(params) > 0) {
-    xgb.parameters(handle) <- params
+    xgb.parameters(bst) <- params
  }
-  return(handle)
+  return(list(bst = bst, niter = 0L))
 }

-# Convert xgb.Booster.handle to xgb.Booster
-# internal utility function
-xgb.handleToBooster <- function(handle, raw) {
-  bst <- list(handle = handle, raw = raw)
-  class(bst) <- "xgb.Booster"
-  return(bst)
-}
-
-# Check whether xgb.Booster.handle is null
+# Check whether xgb.Booster handle is null
 # internal utility function
 is.null.handle <- function(handle) {
  if (is.null(handle)) return(TRUE)

-  if (!identical(class(handle), "xgb.Booster.handle"))
-    stop("argument type must be xgb.Booster.handle")
+  if (!inherits(handle, "externalptr"))
+    stop("argument type must be 'externalptr'")

-  if (.Call(XGCheckNullPtr_R, handle))
-    return(TRUE)
-
-  return(FALSE)
+  return(.Call(XGCheckNullPtr_R, handle))
 }

-# Return a verified to be valid handle out of either xgb.Booster.handle or
-# xgb.Booster internal utility function
+# Return a verified to be valid handle out of xgb.Booster
+# internal utility function
 xgb.get.handle <- function(object) {
  if (inherits(object, "xgb.Booster")) {
-    handle <- object$handle
-  } else if (inherits(object, "xgb.Booster.handle")) {
-    handle <- object
+    handle <- object$ptr
+    if (is.null(handle) || !inherits(handle, "externalptr")) {
+      stop("'xgb.Booster' object is corrupted or is from an incompatible xgboost version.")
+    }
  } else {
-    stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
+    stop("argument must be an 'xgb.Booster' object.")
  }
  if (is.null.handle(handle)) {
-    stop("invalid xgb.Booster.handle")
+    stop("invalid 'xgb.Booster' (blank 'externalptr').")
  }
-  handle
-}
-
-#' Restore missing parts of an incomplete xgb.Booster object
-#'
-#' It attempts to complete an `xgb.Booster` object by restoring either its missing
-#' raw model memory dump (when it has no `raw` data but its `xgb.Booster.handle` is valid)
-#' or its missing internal handle (when its `xgb.Booster.handle` is not valid
-#' but it has a raw Booster memory dump).
-#'
-#' @param object Object of class `xgb.Booster`.
-#' @param saveraw A flag indicating whether to append `raw` Booster memory dump data
-#'                when it doesn't already exist.
-#'
-#' @details
-#'
-#' While this method is primarily for internal use, it might be useful in some practical situations.
-#'
-#' E.g., when an `xgb.Booster` model is saved as an R object and then is loaded as an R object,
-#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
-#' should still work for such a model object since those methods would be using
-#' `xgb.Booster.complete()` internally. However, one might find it to be more efficient to call the
-#' `xgb.Booster.complete()` function explicitly once after loading a model as an R-object.
-#' That would prevent further repeated implicit reconstruction of an internal booster model.
-#'
-#' @return
-#' An object of `xgb.Booster` class.
-#'
-#' @examples
-#'
-#' data(agaricus.train, package = "xgboost")
-#'
-#' bst <- xgboost(
-#'   data = agaricus.train$data,
-#'   label = agaricus.train$label,
-#'   max_depth = 2,
-#'   eta = 1,
-#'   nthread = 2,
-#'   nrounds = 2,
-#'   objective = "binary:logistic"
-#' )
-#'
-#' fname <- file.path(tempdir(), "xgb_model.Rds")
-#' saveRDS(bst, fname)
-#'
-#' # Warning: The resulting RDS file is only compatible with the current XGBoost version.
-#' # Refer to the section titled "a-compatibility-note-for-saveRDS-save".
-#' bst1 <- readRDS(fname)
-#' # the handle is invalid:
-#' print(bst1$handle)
-#'
-#' bst1 <- xgb.Booster.complete(bst1)
-#' # now the handle points to a valid internal booster model:
-#' print(bst1$handle)
-#'
-#' @export
-xgb.Booster.complete <- function(object, saveraw = TRUE) {
-  if (!inherits(object, "xgb.Booster"))
-    stop("argument type must be xgb.Booster")
-
-  if (is.null.handle(object$handle)) {
-    object$handle <- xgb.Booster.handle(
-      params = list(),
-      cachelist = list(),
-      modelfile = object$raw,
-      handle = object$handle
-    )
-  } else {
-    if (is.null(object$raw) && saveraw) {
-      object$raw <- xgb.serialize(object$handle)
-    }
-  }
-
-  attrs <- xgb.attributes(object)
-  if (!is.null(attrs$best_ntreelimit)) {
-    object$best_ntreelimit <- as.integer(attrs$best_ntreelimit)
-  }
-  if (!is.null(attrs$best_iteration)) {
-    ## Convert from 0 based back to 1 based.
-    object$best_iteration <- as.integer(attrs$best_iteration) + 1
-  }
-  if (!is.null(attrs$best_score)) {
-    object$best_score <- as.numeric(attrs$best_score)
-  }
-  if (!is.null(attrs$best_msg)) {
-    object$best_msg <- attrs$best_msg
-  }
-  if (!is.null(attrs$niter)) {
-    object$niter <- as.integer(attrs$niter)
-  }
-
-  return(object)
+  return(handle)
 }

 #' Predict method for XGBoost model
 #'
 #' Predicted values based on either xgboost model or model handle object.
 #'
-#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
+#' @param object Object of class `xgb.Booster`.
 #' @param newdata Takes `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
 #'        local data file, or `xgb.DMatrix`.
 #'        For single-row predictions on sparse data, it is recommended to use the CSR format.
@@ -358,27 +263,19 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
 #' sum(pred5 != lb) / length(lb)
 #'
-#' @rdname predict.xgb.Booster
 #' @export
 predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
-  object <- xgb.Booster.complete(object, saveraw = FALSE)
-
  if (!inherits(newdata, "xgb.DMatrix")) {
-    config <- jsonlite::fromJSON(xgb.config(object))
-    nthread <- strtoi(config$learner$generic_param$nthread)
+    nthread <- xgb.nthread(object)
    newdata <- xgb.DMatrix(
      newdata,
      missing = missing, nthread = NVL(nthread, -1)
    )
  }
-  if (!is.null(object[["feature_names"]]) &&
-      !is.null(colnames(newdata)) &&
-      !identical(object[["feature_names"]], colnames(newdata)))
-    stop("Feature names stored in `object` and `newdata` are different!")

-  if (NVL(object$params[['booster']], '') == 'gblinear' || is.null(ntreelimit))
+  if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
    ntreelimit <- 0

  if (ntreelimit != 0 && is.null(iterationrange)) {
@@ -391,11 +288,12 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    ## both are specified, let libgxgboost throw an error
  } else {
    ## no limit is supplied, use best
-    if (is.null(object$best_iteration)) {
+    best_iteration <- xgb.best_iteration(object)
+    if (is.null(best_iteration)) {
      iterationrange <- c(0, 0)
    } else {
      ## We don't need to + 1 as R is 1-based index.
-      iterationrange <- c(0, as.integer(object$best_iteration))
+      iterationrange <- c(0, as.integer(best_iteration))
    }
  }
  ## Handle the 0 length values.
@@ -438,7 +336,10 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
  }

  predts <- .Call(
-    XGBoosterPredictFromDMatrix_R, object$handle, newdata, jsonlite::toJSON(args, auto_unbox = TRUE)
+    XGBoosterPredictFromDMatrix_R,
+    xgb.get.handle(object),
+    newdata,
+    jsonlite::toJSON(args, auto_unbox = TRUE)
  )
  names(predts) <- c("shape", "results")
  shape <- predts$shape
@@ -509,22 +410,12 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
  return(arr)
 }

-#' @rdname predict.xgb.Booster
-#' @export
-predict.xgb.Booster.handle <- function(object, ...) {

-  bst <- xgb.handleToBooster(handle = object, raw = NULL)
-
-  ret <- predict(bst, ...)
-  return(ret)
-}
-
-
-#' Accessors for serializable attributes of a model
+#' @title Accessors for serializable attributes of a model
 #'
-#' These methods allow to manipulate the key-value attribute strings of an xgboost model.
+#' @description These methods allow to manipulate the key-value attribute strings of an xgboost model.
 #'
-#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
+#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place} when assigning to it.
 #' @param name A non-empty character string specifying which attribute is to be accessed.
 #' @param value For `xgb.attr<-`, a value of an attribute; for `xgb.attributes<-`,
 #'        it is a list (or an object coercible to a list) with the names of attributes to set
@@ -546,16 +437,15 @@ predict.xgb.Booster.handle <- function(object, ...) {
 #' change the value of that parameter for a model.
 #' Use [xgb.parameters<-()] to set or change model parameters.
 #'
-#' The attribute setters would usually work more efficiently for `xgb.Booster.handle`
-#' than for `xgb.Booster`, since only just a handle (pointer) would need to be copied.
-#' That would only matter if attributes need to be set many times.
-#' Note, however, that when feeding a handle of an `xgb.Booster` object to the attribute setters,
-#' the raw model cache of an `xgb.Booster` object would not be automatically updated,
-#' and it would be the user's responsibility to call [xgb.serialize()] to update it.
-#'
 #' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes,
 #' but it doesn't delete the other existing attributes.
 #'
+#' Important: since this modifies the booster's C object, semantics for assignment here
+#' will differ from R's, as any object reference to the same booster will be modified
+#' too, while assignment of R attributes through `attributes(model)$<attr> <- <value>`
+#' will follow the usual copy-on-write R semantics (see \link{xgb.copy.Booster} for an
+#' example of these behaviors).
+#'
 #' @return
 #' - `xgb.attr()` returns either a string value of an attribute
 #'   or `NULL` if an attribute wasn't stored in a model.
@@ -597,14 +487,25 @@ predict.xgb.Booster.handle <- function(object, ...) {
 xgb.attr <- function(object, name) {
  if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
  handle <- xgb.get.handle(object)
-  .Call(XGBoosterGetAttr_R, handle, as.character(name[1]))
+  out <- .Call(XGBoosterGetAttr_R, handle, as.character(name[1]))
+  if (!NROW(out) || !nchar(out)) {
+    return(NULL)
+  }
+  if (!is.null(out)) {
+    if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
+      out <- as.numeric(out)
+    }
+  }
+  return(out)
 }

 #' @rdname xgb.attr
 #' @export
 `xgb.attr<-` <- function(object, name, value) {
-  if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
+  name <- as.character(name[1])
+  if (!NROW(name) || !nchar(name)) stop("invalid attribute name")
  handle <- xgb.get.handle(object)
+
  if (!is.null(value)) {
    # Coerce the elements to be scalar strings.
    # Q: should we warn user about non-scalar elements?
@@ -614,11 +515,8 @@ xgb.attr <- function(object, name) {
      value <- as.character(value[1])
    }
  }
-  .Call(XGBoosterSetAttr_R, handle, as.character(name[1]), value)
-  if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
-    object$raw <- xgb.serialize(object$handle)
-  }
-  object
+  .Call(XGBoosterSetAttr_R, handle, name, value)
+  return(object)
 }

 #' @rdname xgb.attr
@@ -626,12 +524,10 @@ xgb.attr <- function(object, name) {
 xgb.attributes <- function(object) {
  handle <- xgb.get.handle(object)
  attr_names <- .Call(XGBoosterGetAttrNames_R, handle)
-  if (is.null(attr_names)) return(NULL)
-  res <- lapply(attr_names, function(x) {
-    .Call(XGBoosterGetAttr_R, handle, x)
-  })
-  names(res) <- attr_names
-  res
+  if (!NROW(attr_names)) return(list())
+  out <- lapply(attr_names, function(name) xgb.attr(object, name))
+  names(out) <- attr_names
+  return(out)
 }

 #' @rdname xgb.attr
@@ -641,31 +537,21 @@ xgb.attributes <- function(object) {
  if (is.null(names(a)) || any(nchar(names(a)) == 0)) {
    stop("attribute names cannot be empty strings")
  }
-  # Coerce the elements to be scalar strings.
-  # Q: should we warn a user about non-scalar elements?
-  a <- lapply(a, function(x) {
-    if (is.null(x)) return(NULL)
-    if (is.numeric(x[1])) {
-      format(x[1], digits = 17)
-    } else {
-      as.character(x[1])
-    }
-  })
-  handle <- xgb.get.handle(object)
  for (i in seq_along(a)) {
-    .Call(XGBoosterSetAttr_R, handle, names(a[i]), a[[i]])
+    xgb.attr(object, names(a[i])) <- a[[i]]
  }
-  if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
-    object$raw <- xgb.serialize(object$handle)
-  }
-  object
+  return(object)
 }

-#' Accessors for model parameters as JSON string
-#'
-#' @param object Object of class `xgb.Booster`.
-#' @param value A JSON string.
+#' @title Accessors for model parameters as JSON string
+#' @details Note that assignment is performed in-place on the booster C object, which unlike assignment
+#' of R attributes, doesn't follow typical copy-on-write semantics for assignment - i.e. all references
+#' to the same booster will also get updated.
 #'
+#' See \link{xgb.copy.Booster} for an example of this behavior.
+#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place} when assigning to it.
+#' @param value An R list.
+#' @return `xgb.config` will return the parameters as an R list.
 #' @examples
 #' data(agaricus.train, package = "xgboost")
 #'
@@ -690,31 +576,36 @@ xgb.attributes <- function(object) {
 #' @export
 xgb.config <- function(object) {
  handle <- xgb.get.handle(object)
-  .Call(XGBoosterSaveJsonConfig_R, handle)
+  return(jsonlite::fromJSON(.Call(XGBoosterSaveJsonConfig_R, handle)))
 }

 #' @rdname xgb.config
 #' @export
 `xgb.config<-` <- function(object, value) {
  handle <- xgb.get.handle(object)
-  .Call(XGBoosterLoadJsonConfig_R, handle, value)
-  object$raw <- NULL  # force renew the raw buffer
-  object <- xgb.Booster.complete(object)
-  object
+  .Call(
+    XGBoosterLoadJsonConfig_R,
+    handle,
+    jsonlite::toJSON(value, auto_unbox = TRUE, null = "null")
+  )
+  return(object)
 }

-#' Accessors for model parameters
+#' @title Accessors for model parameters
+#' @description Only the setter for xgboost parameters is currently implemented.
+#' @details Just like \link{xgb.attr}, this function will make in-place modifications
+#' on the booster object which do not follow typical R assignment semantics - that is,
+#' all references to the same booster will also be updated, unlike assingment of R
+#' attributes which follow copy-on-write semantics.
 #'
-#' Only the setter for xgboost parameters is currently implemented.
+#' See \link{xgb.copy.Booster} for an example of this behavior.
 #'
-#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
+#' Be aware that setting parameters of a fitted booster related to training continuation / updates
+#' will reset its number of rounds indicator to zero.
+#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place}.
 #' @param value A list (or an object coercible to a list) with the names of parameters to set
 #'        and the elements corresponding to parameter values.
-#'
-#' @details
-#' Note that the setter would usually work more efficiently for `xgb.Booster.handle`
-#' than for `xgb.Booster`, since only just a handle would need to be copied.
-#'
+#' @return The same booster `object`, which gets modified in-place.
 #' @examples
 #' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
@@ -751,28 +642,301 @@ xgb.config <- function(object) {
  for (i in seq_along(p)) {
    .Call(XGBoosterSetParam_R, handle, names(p[i]), p[[i]])
  }
-  if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
-    object$raw <- xgb.serialize(object$handle)
+  return(object)
+}
+
+#' @rdname getinfo
+#' @export
+getinfo.xgb.Booster <- function(object, name) {
+  name <- as.character(head(name, 1L))
+  allowed_fields <- c("feature_name", "feature_type")
+  if (!(name %in% allowed_fields)) {
+    stop("getinfo: name must be one of the following: ", paste(allowed_fields, collapse = ", "))
  }
-  object
+  handle <- xgb.get.handle(object)
+  out <- .Call(
+    XGBoosterGetStrFeatureInfo_R,
+    handle,
+    name
+  )
+  if (!NROW(out)) {
+    return(NULL)
+  }
+  return(out)
+}
+
+#' @rdname getinfo
+#' @export
+setinfo.xgb.Booster <- function(object, name, info) {
+  name <- as.character(head(name, 1L))
+  allowed_fields <- c("feature_name", "feature_type")
+  if (!(name %in% allowed_fields)) {
+    stop("setinfo: unknown info name ", name)
+  }
+  info <- as.character(info)
+  handle <- xgb.get.handle(object)
+  .Call(
+    XGBoosterSetStrFeatureInfo_R,
+    handle,
+    name,
+    info
+  )
+  return(TRUE)
+}
+
+#' @title Get number of boosting in a fitted booster
+#' @param model A fitted `xgb.Booster` model.
+#' @return The number of rounds saved in the model, as an integer.
+#' @details Note that setting booster parameters related to training
+#' continuation / updates through \link{xgb.parameters<-} will reset the
+#' number of rounds to zero.
+#' @export
+xgb.get.num.boosted.rounds <- function(model) {
+  return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
+}
+
+#' @title Get Features Names from Booster
+#' @description Returns the feature / variable / column names from a fitted
+#' booster object, which are set automatically during the call to \link{xgb.train}
+#' from the DMatrix names, or which can be set manually through \link{setinfo}.
+#'
+#' If the object doesn't have feature names, will return `NULL`.
+#'
+#' It is equivalent to calling `getinfo(object, "feature_name")`.
+#' @param object An `xgb.Booster` object.
+#' @param ... Not used.
+#' @export
+variable.names.xgb.Booster <- function(object, ...) {
+  return(getinfo(object, "feature_name"))
 }

-# Extract the number of trees in a model.
-# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration.
-# internal utility function
 xgb.ntree <- function(bst) {
-  length(grep('^booster', xgb.dump(bst)))
+  config <- xgb.config(bst)
+  out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
+  return(out)
 }

+xgb.nthread <- function(bst) {
+  config <- xgb.config(bst)
+  out <- strtoi(config$learner$generic_param$nthread)
+  return(out)
+}

-#' Print xgb.Booster
+xgb.booster_type <- function(bst) {
+  config <- xgb.config(bst)
+  out <- config$learner$learner_train_param$booster
+  return(out)
+}
+
+xgb.num_class <- function(bst) {
+  config <- xgb.config(bst)
+  out <- strtoi(config$learner$learner_model_param$num_class)
+  return(out)
+}
+
+xgb.feature_names <- function(bst) {
+  return(getinfo(bst, "feature_name"))
+}
+
+xgb.feature_types <- function(bst) {
+  return(getinfo(bst, "feature_type"))
+}
+
+xgb.num_feature <- function(bst) {
+  handle <- xgb.get.handle(bst)
+  return(.Call(XGBoosterGetNumFeature_R, handle))
+}
+
+xgb.best_iteration <- function(bst) {
+  out <- xgb.attr(bst, "best_iteration")
+  if (!NROW(out) || !nchar(out)) {
+    out <- NULL
+  }
+  return(out)
+}
+
+#' @title Extract coefficients from linear booster
+#' @description Extracts the coefficients from a 'gblinear' booster object,
+#' as produced by \code{xgb.train} when using parameter `booster="gblinear"`.
 #'
-#' Print information about `xgb.Booster`.
+#' Note: this function will error out if passing a booster model
+#' which is not of "gblinear" type.
+#' @param object A fitted booster of 'gblinear' type.
+#' @param ... Not used.
+#' @return The extracted coefficients:\itemize{
+#' \item If there's only one coefficient per column in the data, will be returned as a
+#' vector, potentially containing the feature names if available, with the intercept
+#' as first column.
+#' \item If there's more than one coefficient per column in the data (e.g. when using
+#' `objective="multi:softmax"`), will be returned as a matrix with dimensions equal
+#' to `[num_features, num_cols]`, with the intercepts as first row. Note that the column
+#' (classes in multi-class classification) dimension will not be named.
+#' }
 #'
+#' The intercept returned here will include the 'base_score' parameter (unlike the 'bias'
+#' or the last coefficient in the model dump, which doesn't have 'base_score' added to it),
+#' hence one should get the same values from calling `predict(..., outputmargin = TRUE)` and
+#' from performing a matrix multiplication with `model.matrix(~., ...)`.
+#'
+#' Be aware that the coefficients are obtained by first converting them to strings and
+#' back, so there will always be some very small lose of precision compared to the actual
+#' coefficients as used by \link{predict.xgb.Booster}.
+#' @examples
+#' library(xgboost)
+#' data(mtcars)
+#' y <- mtcars[, 1]
+#' x <- as.matrix(mtcars[, -1])
+#' dm <- xgb.DMatrix(data = x, label = y, nthread = 1)
+#' params <- list(booster = "gblinear", nthread = 1)
+#' model <- xgb.train(data = dm, params = params, nrounds = 2)
+#' coef(model)
+#' @export
+coef.xgb.Booster <- function(object, ...) {
+  booster_type <- xgb.booster_type(object)
+  if (booster_type != "gblinear") {
+    stop("Coefficients are not defined for Booster type ", booster_type)
+  }
+  model_json <- jsonlite::fromJSON(rawToChar(xgb.save.raw(object, raw_format = "json")))
+  base_score <- model_json$learner$learner_model_param$base_score
+  num_feature <- as.numeric(model_json$learner$learner_model_param$num_feature)
+
+  weights <- model_json$learner$gradient_booster$model$weights
+  n_cols <- length(weights) / (num_feature + 1)
+  if (n_cols != floor(n_cols) || n_cols < 1) {
+    stop("Internal error: could not determine shape of coefficients.")
+  }
+  sep <- num_feature * n_cols
+  coefs <- weights[seq(1, sep)]
+  intercepts <- weights[seq(sep + 1, length(weights))]
+  intercepts <- intercepts + as.numeric(base_score)
+
+  feature_names <- xgb.feature_names(object)
+  if (!NROW(feature_names)) {
+    # This mimics the default naming in R which names columns as "V1..N"
+    # when names are needed but not available
+    feature_names <- paste0("V", seq(1L, num_feature))
+  }
+  feature_names <- c("(Intercept)", feature_names)
+  if (n_cols == 1L) {
+    out <- c(intercepts, coefs)
+    names(out) <- feature_names
+  } else {
+    coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE)
+    dim(intercepts) <- c(1L, n_cols)
+    out <- rbind(intercepts, coefs)
+    row.names(out) <- feature_names
+    # TODO: if a class names attributes is added,
+    # should use those names here.
+  }
+  return(out)
+}
+
+#' @title Deep-copies a Booster Object
+#' @description Creates a deep copy of an 'xgb.Booster' object, such that the
+#' C object pointer contained will be a different object, and hence functions
+#' like \link{xgb.attr} will not affect the object from which it was copied.
+#' @param model An 'xgb.Booster' object.
+#' @return A deep copy of `model` - it will be identical in every way, but C-level
+#' functions called on that copy will not affect the `model` variable.
+#' @examples
+#' library(xgboost)
+#' data(mtcars)
+#' y <- mtcars$mpg
+#' x <- mtcars[, -1]
+#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
+#' model <- xgb.train(
+#'   data = dm,
+#'   params = list(nthread = 1),
+#'   nround = 3
+#' )
+#'
+#' # Set an arbitrary attribute kept at the C level
+#' xgb.attr(model, "my_attr") <- 100
+#' print(xgb.attr(model, "my_attr"))
+#'
+#' # Just assigning to a new variable will not create
+#' # a deep copy - C object pointer is shared, and in-place
+#' # modifications will affect both objects
+#' model_shallow_copy <- model
+#' xgb.attr(model_shallow_copy, "my_attr") <- 333
+#' # 'model' was also affected by this change:
+#' print(xgb.attr(model, "my_attr"))
+#'
+#' model_deep_copy <- xgb.copy.Booster(model)
+#' xgb.attr(model_deep_copy, "my_attr") <- 444
+#' # 'model' was NOT affected by this change
+#' # (keeps previous value that was assigned before)
+#' print(xgb.attr(model, "my_attr"))
+#'
+#' # Verify that the new object was actually modified
+#' print(xgb.attr(model_deep_copy, "my_attr"))
+#' @export
+xgb.copy.Booster <- function(model) {
+  if (!inherits(model, "xgb.Booster")) {
+    stop("'model' must be an 'xgb.Booster' object.")
+  }
+  return(.Call(XGDuplicate_R, model))
+}
+
+#' @title Check if two boosters share the same C object
+#' @description Checks whether two booster objects refer to the same underlying C object.
+#' @details As booster objects (as returned by e.g. \link{xgb.train}) contain an R 'externalptr'
+#' object, they don't follow typical copy-on-write semantics of other R objects - that is, if
+#' one assigns a booster to a different variable and modifies that new variable through in-place
+#' methods like \link{xgb.attr<-}, the modification will be applied to both the old and the new
+#' variable, unlike typical R assignments which would only modify the latter.
+#'
+#' This function allows checking whether two booster objects share the same 'externalptr',
+#' regardless of the R attributes that they might have.
+#'
+#' In order to duplicate a booster in such a way that the copy wouldn't share the same
+#' 'externalptr', one can use function \link{xgb.copy.Booster}.
+#' @param obj1 Booster model to compare with `obj2`.
+#' @param obj2 Booster model to compare with `obj1`.
+#' @return Either `TRUE` or `FALSE` according to whether the two boosters share
+#' the underlying C object.
+#' @seealso \link{xgb.copy.Booster}
+#' @examples
+#' library(xgboost)
+#' data(mtcars)
+#' y <- mtcars$mpg
+#' x <- as.matrix(mtcars[, -1])
+#' model <- xgb.train(
+#'   params = list(nthread = 1),
+#'   data = xgb.DMatrix(x, label = y, nthread = 1),
+#'   nround = 3
+#' )
+#'
+#' model_shallow_copy <- model
+#' xgb.is.same.Booster(model, model_shallow_copy) # same C object
+#'
+#' model_deep_copy <- xgb.copy.Booster(model)
+#' xgb.is.same.Booster(model, model_deep_copy) # different C objects
+#'
+#' # In-place assignments modify all references,
+#' # but not full/deep copies of the booster
+#' xgb.attr(model_shallow_copy, "my_attr") <- 111
+#' xgb.attr(model, "my_attr") # gets modified
+#' xgb.attr(model_deep_copy, "my_attr") # doesn't get modified
+#' @export
+xgb.is.same.Booster <- function(obj1, obj2) {
+  if (!inherits(obj1, "xgb.Booster") || !inherits(obj2, "xgb.Booster")) {
+    stop("'xgb.is.same.Booster' is only applicable to 'xgb.Booster' objects.")
+  }
+  return(
+    .Call(
+      XGPointerEqComparison_R,
+      xgb.get.handle(obj1),
+      xgb.get.handle(obj2)
+    )
+  )
+}
+
+#' @title Print xgb.Booster
+#' @description Print information about `xgb.Booster`.
 #' @param x An `xgb.Booster` object.
-#' @param verbose Whether to print detailed data (e.g., attribute values).
-#' @param ... Not currently used.
-#'
+#' @param ... Not used.
+#' @return The same `x` object, returned invisibly
 #' @examples
 #' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
@@ -790,79 +954,40 @@ xgb.ntree <- function(bst) {
 #' attr(bst, "myattr") <- "memo"
 #'
 #' print(bst)
-#' print(bst, verbose = TRUE)
 #'
 #' @export
-print.xgb.Booster <- function(x, verbose = FALSE, ...) {
+print.xgb.Booster <- function(x, ...) {
+  # this lets it error out when the object comes from an earlier R xgboost version
+  handle <- xgb.get.handle(x)
  cat('##### xgb.Booster\n')

-  valid_handle <- !is.null.handle(x$handle)
-  if (!valid_handle)
-    cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
-
-  cat('raw: ')
-  if (!is.null(x$raw)) {
-    cat(format(object.size(x$raw), units = "auto"), '\n')
-  } else {
-    cat('NULL\n')
-  }
-  if (!is.null(x$call)) {
+  R_attrs <- attributes(x)
+  if (!is.null(R_attrs$call)) {
    cat('call:\n  ')
-    print(x$call)
+    print(R_attrs$call)
  }

-  if (!is.null(x$params)) {
-    cat('params (as set within xgb.train):\n')
-    cat('  ',
-         paste(names(x$params),
-               paste0('"', unlist(x$params), '"'),
-               sep = ' = ', collapse = ', '), '\n', sep = '')
-  }
-  # TODO: need an interface to access all the xgboosts parameters
+  cat('# of features:', xgb.num_feature(x), '\n')
+  cat('# of rounds: ', xgb.get.num.boosted.rounds(x), '\n')

-  attrs <- character(0)
-  if (valid_handle)
-    attrs <- xgb.attributes(x)
-  if (length(attrs) > 0) {
+  attr_names <- .Call(XGBoosterGetAttrNames_R, handle)
+  if (NROW(attr_names)) {
    cat('xgb.attributes:\n')
-    if (verbose) {
-        cat(paste(paste0('  ', names(attrs)),
-                  paste0('"', unlist(attrs), '"'),
-                  sep = ' = ', collapse = '\n'), '\n', sep = '')
-    } else {
-      cat('  ', paste(names(attrs), collapse = ', '), '\n', sep = '')
-    }
+    cat("  ", paste(attr_names, collapse = ", "), "\n")
  }

-  if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
+  if (!is.null(R_attrs$callbacks) && length(R_attrs$callbacks) > 0) {
    cat('callbacks:\n')
-    lapply(callback.calls(x$callbacks), function(x) {
+    lapply(callback.calls(R_attrs$callbacks), function(x) {
      cat('  ')
      print(x)
    })
  }

-  if (!is.null(x$feature_names))
-    cat('# of features:', length(x$feature_names), '\n')
-
-  cat('niter: ', x$niter, '\n', sep = '')
-  # TODO: uncomment when faster xgb.ntree is implemented
-  #cat('ntree: ', xgb.ntree(x), '\n', sep='')
-
-  for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
-                                'evaluation_log', 'niter', 'feature_names'))) {
-    if (is.atomic(x[[n]])) {
-      cat(n, ':', x[[n]], '\n', sep = ' ')
-    } else {
-      cat(n, ':\n\t', sep = ' ')
-      print(x[[n]])
-    }
-  }
-
-  if (!is.null(x$evaluation_log)) {
+  if (!is.null(R_attrs$evaluation_log)) {
    cat('evaluation_log:\n')
-    print(x$evaluation_log, row.names = FALSE, topn = 2)
+    print(R_attrs$evaluation_log, row.names = FALSE, topn = 2)
  }

-  invisible(x)
+  return(invisible(x))
 }
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -335,14 +335,13 @@ dimnames.xgb.DMatrix <- function(x) {
 }


-#' Get information of an xgb.DMatrix object
-#'
-#' Get information of an xgb.DMatrix object
-#' @param object Object of class \code{xgb.DMatrix}
+#' @title Get or set information of xgb.DMatrix and xgb.Booster objects
+#' @param object Object of class \code{xgb.DMatrix} of `xgb.Booster`.
 #' @param name the name of the information field to get (see details)
-#'
+#' @return For `getinfo`, will return the requested field. For `setinfo`, will always return value `TRUE`
+#' if it succeeds.
 #' @details
-#' The \code{name} field can be one of the following:
+#' The \code{name} field can be one of the following for `xgb.DMatrix`:
 #'
 #' \itemize{
 #'     \item \code{label}
@@ -357,9 +356,17 @@ dimnames.xgb.DMatrix <- function(x) {
 #' }
 #' See the documentation for \link{xgb.DMatrix} for more information about these fields.
 #'
+#' For `xgb.Booster`, can be one of the following:
+#' \itemize{
+#'     \item \code{feature_type}
+#'     \item \code{feature_name}
+#' }
+#'
 #' Note that, while 'qid' cannot be retrieved, it's possible to get the equivalent 'group'
 #' for a DMatrix that had 'qid' assigned.
 #'
+#' \bold{Important}: when calling `setinfo`, the objects are modified in-place. See
+#' \link{xgb.copy.Booster} for an idea of this in-place assignment works.
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
@@ -412,13 +419,7 @@ getinfo.xgb.DMatrix <- function(object, name) {
  return(ret)
 }

-
-#' Set information of an xgb.DMatrix object
-#'
-#' Set information of an xgb.DMatrix object
-#'
-#' @param object Object of class "xgb.DMatrix"
-#' @param name the name of the field to get
+#' @rdname getinfo
 #' @param info the specific field of information to set
 #'
 #' @details
@@ -441,11 +442,10 @@ getinfo.xgb.DMatrix <- function(object, name) {
 #' setinfo(dtrain, 'label', 1-labels)
 #' labels2 <- getinfo(dtrain, 'label')
 #' stopifnot(all.equal(labels2, 1-labels))
-#' @rdname setinfo
 #' @export
 setinfo <- function(object, name, info) UseMethod("setinfo")

-#' @rdname setinfo
+#' @rdname getinfo
 #' @export
 setinfo.xgb.DMatrix <- function(object, name, info) {
  .internal.setinfo.xgb.DMatrix(object, name, info)
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -204,13 +204,13 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
       dtrain <- slice(dall, unlist(folds[-k]))
    else
       dtrain <- slice(dall, train_folds[[k]])
-    handle <- xgb.Booster.handle(
+    bst <- xgb.Booster(
      params = params,
      cachelist = list(dtrain, dtest),
-      modelfile = NULL,
-      handle = NULL
+      modelfile = NULL
    )
-    list(dtrain = dtrain, bst = handle, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
+    bst <- bst$bst
+    list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
  })
  rm(dall)
  # a "basket" to collect some results from callbacks
@@ -231,13 +231,13 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing

    msg <- lapply(bst_folds, function(fd) {
      xgb.iter.update(
-        booster_handle = fd$bst,
+        bst = fd$bst,
        dtrain = fd$dtrain,
        iter = iteration - 1,
        obj = obj
      )
      xgb.iter.eval(
-        booster_handle = fd$bst,
+        bst = fd$bst,
        watchlist = fd$watchlist,
        iter = iteration - 1,
        feval = feval
@@ -267,7 +267,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
  ret <- c(ret, basket)

  class(ret) <- 'xgb.cv.synchronous'
-  invisible(ret)
+  return(invisible(ret))
 }


--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -56,9 +56,13 @@ xgb.dump <- function(model, fname = NULL, fmap = "", with_stats = FALSE,
  if (!(is.null(fmap) || is.character(fmap)))
    stop("fmap: argument must be a character string (when provided)")

-  model <- xgb.Booster.complete(model)
-  model_dump <- .Call(XGBoosterDumpModel_R, model$handle, NVL(fmap, "")[1], as.integer(with_stats),
-                      as.character(dump_format))
+  model_dump <- .Call(
+    XGBoosterDumpModel_R,
+    xgb.get.handle(model),
+    NVL(fmap, "")[1],
+    as.integer(with_stats),
+    as.character(dump_format)
+  )
  if (dump_format == "dot") {
    return(sapply(model_dump, function(x) gsub("^booster\\[\\d+\\]\\n", "\\1", x)))
  }
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -119,21 +119,21 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
  if (!(is.null(data) && is.null(label) && is.null(target)))
    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")

-  if (!inherits(model, "xgb.Booster"))
-    stop("model: must be an object of class xgb.Booster")
-
-  if (is.null(feature_names) && !is.null(model$feature_names))
-    feature_names <- model$feature_names
+  if (is.null(feature_names)) {
+    model_feature_names <- xgb.feature_names(model)
+    if (NROW(model_feature_names)) {
+      feature_names <- model_feature_names
+    }
+  }

  if (!(is.null(feature_names) || is.character(feature_names)))
    stop("feature_names: Has to be a character vector")

-  model <- xgb.Booster.complete(model)
-  config <- jsonlite::fromJSON(xgb.config(model))
-  if (config$learner$gradient_booster$name == "gblinear") {
+  handle <- xgb.get.handle(model)
+  if (xgb.booster_type(model) == "gblinear") {
    args <- list(importance_type = "weight", feature_names = feature_names)
    results <- .Call(
-      XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
+      XGBoosterFeatureScore_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
    )
    names(results) <- c("features", "shape", "weight")
    if (length(results$shape) == 2) {
@@ -154,7 +154,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
    for (importance_type in c("weight", "total_gain", "total_cover")) {
      args <- list(importance_type = importance_type, feature_names = feature_names, tree_idx = trees)
      results <- .Call(
-        XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
+        XGBoosterFeatureScore_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
      )
      names(results) <- c("features", "shape", importance_type)
      concatenated[
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -17,7 +17,7 @@
 #' An object of \code{xgb.Booster} class.
 #'
 #' @seealso
-#' \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
+#' \code{\link{xgb.save}}
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -46,25 +46,20 @@ xgb.load <- function(modelfile) {
  if (is.null(modelfile))
    stop("xgb.load: modelfile cannot be NULL")

-  handle <- xgb.Booster.handle(
+  bst <- xgb.Booster(
    params = list(),
    cachelist = list(),
-    modelfile = modelfile,
-    handle = NULL
+    modelfile = modelfile
  )
+  bst <- bst$bst
  # re-use modelfile if it is raw so we do not need to serialize
  if (typeof(modelfile) == "raw") {
    warning(
      paste(
        "The support for loading raw booster with `xgb.load` will be ",
-        "discontinued in upcoming release. Use `xgb.load.raw` or",
-        " `xgb.unserialize` instead. "
+        "discontinued in upcoming release. Use `xgb.load.raw` instead. "
      )
    )
-    bst <- xgb.handleToBooster(handle = handle, raw = modelfile)
-  } else {
-    bst <- xgb.handleToBooster(handle = handle, raw = NULL)
  }
-  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
  return(bst)
 }
--- a/R-package/R/xgb.load.raw.R
+++ b/R-package/R/xgb.load.raw.R
@@ -3,21 +3,10 @@
 #' User can generate raw memory buffer by calling xgb.save.raw
 #'
 #' @param buffer the buffer returned by xgb.save.raw
-#' @param as_booster Return the loaded model as xgb.Booster instead of xgb.Booster.handle.
-#'
 #' @export
-xgb.load.raw <- function(buffer, as_booster = FALSE) {
+xgb.load.raw <- function(buffer) {
  cachelist <- list()
-  handle <- .Call(XGBoosterCreate_R, cachelist)
-  .Call(XGBoosterLoadModelFromRaw_R, handle, buffer)
-  class(handle) <- "xgb.Booster.handle"
-
-  if (as_booster) {
-    booster <- list(handle = handle, raw = NULL)
-    class(booster) <- "xgb.Booster"
-    booster <- xgb.Booster.complete(booster, saveraw = TRUE)
-    return(booster)
-  } else {
-    return(handle)
-  }
+  bst <- .Call(XGBoosterCreate_R, cachelist)
+  .Call(XGBoosterLoadModelFromRaw_R, xgb.get.handle(bst), buffer)
+  return(bst)
 }
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -2,8 +2,10 @@
 #'
 #' Parse a boosted tree model text dump into a `data.table` structure.
 #'
-#' @param feature_names Character vector used to overwrite the feature names
-#'        of the model. The default (`NULL`) uses the original feature names.
+#' @param feature_names Character vector of feature names. If the model already
+#'        contains feature names, those will be used when \code{feature_names=NULL} (default value).
+#'
+#'        Note that, if the model already contains feature names, it's \bold{not} possible to override them here.
 #' @param model Object of class `xgb.Booster`.
 #' @param text Character vector previously generated by the function [xgb.dump()]
 #'        (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`.
@@ -54,8 +56,6 @@
 #'   objective = "binary:logistic"
 #' )
 #'
-#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
-#'
 #' # This bst model already has feature_names stored with it, so those would be used when
 #' # feature_names is not set:
 #' (dt <- xgb.model.dt.tree(model = bst))
@@ -79,8 +79,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
         "  (or NULL if 'model' was provided).")
  }

-  if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
-    feature_names <- model$feature_names
+  model_feature_names <- NULL
+  if (inherits(model, "xgb.Booster")) {
+    model_feature_names <- xgb.feature_names(model)
+    if (NROW(model_feature_names) && !is.null(feature_names)) {
+      stop("'model' contains feature names. Cannot override them.")
+    }
+  }
+  if (is.null(feature_names) && !is.null(model) && !is.null(model_feature_names))
+    feature_names <- model_feature_names

  if (!(is.null(feature_names) || is.character(feature_names))) {
    stop("feature_names: must be a character vector")
@@ -90,8 +97,10 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
    stop("trees: must be a vector of integers.")
  }

+  from_text <- TRUE
  if (is.null(text)) {
    text <- xgb.dump(model = model, with_stats = TRUE)
+    from_text <- FALSE
  }

  if (length(text) < 2 || !any(grepl('leaf=(\\d+)', text))) {
@@ -120,8 +129,28 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  td[, isLeaf := grepl("leaf", t, fixed = TRUE)]

  # parse branch lines
-  branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
-                      "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
+  branch_rx_nonames <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
+                              "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
+  branch_rx_w_names <- paste0("\\d+:\\[(.+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
+                              "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
+  text_has_feature_names <- FALSE
+  if (NROW(model_feature_names)) {
+    branch_rx <- branch_rx_w_names
+    text_has_feature_names <- TRUE
+  } else {
+    # Note: when passing a text dump, it might or might not have feature names,
+    # but that aspect is unknown from just the text attributes
+    branch_rx <- branch_rx_nonames
+    if (from_text) {
+      if (sum(grepl(branch_rx_w_names, text)) > sum(grepl(branch_rx_nonames, text))) {
+        branch_rx <- branch_rx_w_names
+        text_has_feature_names <- TRUE
+      }
+    }
+  }
+  if (text_has_feature_names && is.null(model) && !is.null(feature_names)) {
+    stop("'text' contains feature names. Cannot override them.")
+  }
  branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
  td[
    isLeaf == FALSE,
@@ -144,10 +173,12 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
  is_stump <- function() {
    return(length(td$Feature) == 1 && is.na(td$Feature))
  }
-  if (!is.null(feature_names) && !is_stump()) {
-    if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
-      stop("feature_names has less elements than there are features used in the model")
-    td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1]]
+  if (!text_has_feature_names) {
+    if (!is.null(feature_names) && !is_stump()) {
+      if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
+        stop("feature_names has less elements than there are features used in the model")
+      td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1]]
+    }
  }

  # parse leaf lines
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@@ -303,7 +303,11 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
  if (is.character(features) && is.null(colnames(data)))
    stop("either provide `data` with column names or provide `features` as column indices")

-  if (is.null(model$feature_names) && model$nfeatures != ncol(data))
+  model_feature_names <- NULL
+  if (is.null(features) && !is.null(model)) {
+    model_feature_names <- xgb.feature_names(model)
+  }
+  if (is.null(model_feature_names) && xgb.num_feature(model) != ncol(data))
    stop("if model has no feature_names, columns in `data` must match features in model")

  if (!is.null(subsample)) {
@@ -332,7 +336,7 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
  }

  if (is.null(features)) {
-    if (!is.null(model$feature_names)) {
+    if (!is.null(model_feature_names)) {
      imp <- xgb.importance(model = model, trees = trees)
    } else {
      imp <- xgb.importance(model = model, trees = trees, feature_names = colnames(data))
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -1,12 +1,24 @@
 #' Save xgboost model to binary file
 #'
-#' Save xgboost model to a file in binary format.
+#' Save xgboost model to a file in binary or JSON format.
 #'
-#' @param model model object of \code{xgb.Booster} class.
-#' @param fname name of the file to write.
+#' @param model Model object of \code{xgb.Booster} class.
+#' @param fname Name of the file to write.
+#'
+#' Note that the extension of this file name determined the serialization format to use:\itemize{
+#' \item Extension ".ubj" will use the universal binary JSON format (recommended).
+#' This format uses binary types for e.g. floating point numbers, thereby preventing any loss
+#' of precision when converting to a human-readable JSON text or similar.
+#' \item Extension ".json" will use plain JSON, which is a human-readable format.
+#' \item Extension ".deprecated" will use a \bold{deprecated} binary format. This format will
+#' not be able to save attributes introduced after v1 of XGBoost, such as the "best_iteration"
+#' attribute that boosters might keep, nor feature names or user-specifiec attributes.
+#' \item If the format is not specified by passing one of the file extensions above, will
+#' default to UBJ.
+#' }
 #'
 #' @details
-#' This methods allows to save a model in an xgboost-internal binary format which is universal
+#' This methods allows to save a model in an xgboost-internal binary or text format which is universal
 #' among the various xgboost interfaces. In R, the saved model file could be read-in later
 #' using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter
 #' of \code{\link{xgb.train}}.
@@ -14,13 +26,13 @@
 #' Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}}
 #' or \code{\link[base]{save}}). However, it would then only be compatible with R, and
 #' corresponding R-methods would need to be used to load it. Moreover, persisting the model with
-#' \code{\link[base]{readRDS}} or \code{\link[base]{save}}) will cause compatibility problems in
+#' \code{\link[base]{readRDS}} or \code{\link[base]{save}}) might cause compatibility problems in
 #' future versions of XGBoost. Consult \code{\link{a-compatibility-note-for-saveRDS-save}} to learn
 #' how to persist models in a future-proof way, i.e. to make the model accessible in future
 #' releases of XGBoost.
 #'
 #' @seealso
-#' \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
+#' \code{\link{xgb.load}}
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -51,8 +63,7 @@ xgb.save <- function(model, fname) {
    stop("model must be xgb.Booster.",
         if (inherits(model, "xgb.DMatrix")) " Use xgb.DMatrix.save to save an xgb.DMatrix object." else "")
  }
-  model <- xgb.Booster.complete(model, saveraw = FALSE)
  fname <- path.expand(fname)
-  .Call(XGBoosterSaveModel_R, model$handle, enc2utf8(fname[1]))
+  .Call(XGBoosterSaveModel_R, xgb.get.handle(model), enc2utf8(fname[1]))
  return(TRUE)
 }
--- a/R-package/R/xgb.save.raw.R
+++ b/R-package/R/xgb.save.raw.R
@@ -11,8 +11,6 @@
 #'     \item \code{deprecated}: Encode the booster into old customized binary format.
 #' }
 #'
-#' Right now the default is \code{deprecated} but will be changed to \code{ubj} in upcoming release.
-#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
@@ -30,7 +28,7 @@
 #' bst <- xgb.load.raw(raw)
 #'
 #' @export
-xgb.save.raw <- function(model, raw_format = "deprecated") {
+xgb.save.raw <- function(model, raw_format = "ubj") {
  handle <- xgb.get.handle(model)
  args <- list(format = raw_format)
  .Call(XGBoosterSaveModelToRaw_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE))
--- a/R-package/R/xgb.serialize.R
+++ b/R-package/R/xgb.serialize.R
@@ -1,21 +0,0 @@
-#' Serialize the booster instance into R's raw vector.  The serialization method differs
-#' from \code{\link{xgb.save.raw}} as the latter one saves only the model but not
-#' parameters.  This serialization format is not stable across different xgboost versions.
-#'
-#' @param booster the booster instance
-#'
-#' @examples
-#' data(agaricus.train, package='xgboost')
-#' data(agaricus.test, package='xgboost')
-#' train <- agaricus.train
-#' test <- agaricus.test
-#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
-#'                  eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
-#' raw <- xgb.serialize(bst)
-#' bst <- xgb.unserialize(raw)
-#'
-#' @export
-xgb.serialize <- function(booster) {
-  handle <- xgb.get.handle(booster)
-  .Call(XGBoosterSerializeToBuffer_R, handle)
-}
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -152,6 +152,10 @@
 #'        See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
 #'        parameters' values. User can provide either existing or their own callback methods in order
 #'        to customize the training process.
+#'
+#'        Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
+#'        are kept as R attributes, and thus do not get saved when using non-R serializaters like
+#'        \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
 #' @param ... other parameters to pass to \code{params}.
 #' @param label vector of response values. Should not be provided when data is
 #'        a local data file name or an \code{xgb.DMatrix}.
@@ -160,6 +164,9 @@
 #'        This parameter is only used when input is a dense matrix.
 #' @param weight a vector indicating the weight for each row of the input.
 #'
+#' @return
+#' An object of class \code{xgb.Booster}.
+#'
 #' @details
 #' These are the training functions for \code{xgboost}.
 #'
@@ -201,28 +208,20 @@
 #'   \item \code{cb.save.model}: when \code{save_period > 0} is set.
 #' }
 #'
-#' @return
-#' An object of class \code{xgb.Booster} with the following elements:
-#' \itemize{
-#'   \item \code{handle} a handle (pointer) to the xgboost model in memory.
-#'   \item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
-#'   \item \code{niter} number of boosting iterations.
-#'   \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
-#'         first column corresponding to iteration number and the rest corresponding to evaluation
-#'         metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
-#'   \item \code{call} a function call.
-#'   \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-#'         capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-#'   \item \code{callbacks} callback functions that were either automatically assigned or
-#'         explicitly passed.
-#'   \item \code{best_iteration} iteration number with the best evaluation metric value
-#'         (only available with early stopping).
-#'   \item \code{best_score} the best evaluation metric value during early stopping.
-#'         (only available with early stopping).
-#'   \item \code{feature_names} names of the training dataset features
-#'         (only when column names were defined in training data).
-#'   \item \code{nfeatures} number of features in training data.
-#' }
+#' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently
+#' from typical R objects (it's an 'altrep' list class), and it makes a separation between
+#' internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
+#' and shared between interfaces through serialization functions like \link{xgb.save}; and
+#' R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+#' only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
+#' not anyhow used by functions like \link{predict.xgb.Booster}.
+#'
+#' Be aware that one such R attribute that is automatically added is `params` - this attribute
+#' is assigned from the `params` argument to this function, and is only meant to serve as a
+#' reference for what went into the booster, but is not used in other methods that take a booster
+#' object - so for example, changing the booster's configuration requires calling `xgb.config<-`
+#' or 'xgb.parameters<-', while simply modifying `attributes(model)$params$<...>` will have no
+#' effect elsewhere.
 #'
 #' @seealso
 #' \code{\link{callbacks}},
@@ -371,27 +370,31 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  # The tree updating process would need slightly different handling
  is_update <- NVL(params[['process_type']], '.') == 'update'

+  past_evaluation_log <- NULL
+  if (inherits(xgb_model, "xgb.Booster")) {
+    past_evaluation_log <- attributes(xgb_model)$evaluation_log
+  }
+
  # Construct a booster (either a new one or load from xgb_model)
-  handle <- xgb.Booster.handle(
+  bst <- xgb.Booster(
    params = params,
    cachelist = append(watchlist, dtrain),
-    modelfile = xgb_model,
-    handle = NULL
+    modelfile = xgb_model
+  )
+  niter_init <- bst$niter
+  bst <- bst$bst
+  .Call(
+    XGBoosterCopyInfoFromDMatrix_R,
+    xgb.get.handle(bst),
+    dtrain
  )
-  bst <- xgb.handleToBooster(handle = handle, raw = NULL)

  # extract parameters that can affect the relationship b/w #trees and #iterations
-  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1)
-  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1)
+  # Note: it might look like these aren't used, but they need to be defined in this
+  # environment for the callbacks for work correctly.
+  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
+  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint

-  # When the 'xgb_model' was set, find out how many boosting iterations it has
-  niter_init <- 0
-  if (!is.null(xgb_model)) {
-    niter_init <- as.numeric(xgb.attr(bst, 'niter')) + 1
-    if (length(niter_init) == 0) {
-      niter_init <- xgb.ntree(bst) %/% (num_parallel_tree * num_class)
-    }
-  }
  if (is_update && nrounds > niter_init)
    stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")

@@ -405,7 +408,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    for (f in cb$pre_iter) f()

    xgb.iter.update(
-        booster_handle = bst$handle,
+        bst = bst,
        dtrain = dtrain,
        iter = iteration - 1,
        obj = obj
@@ -413,46 +416,43 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),

    if (length(watchlist) > 0) {
      bst_evaluation <- xgb.iter.eval(  # nolint: object_usage_linter
-        booster_handle = bst$handle,
+        bst = bst,
        watchlist = watchlist,
        iter = iteration - 1,
        feval = feval
      )
    }

-    xgb.attr(bst$handle, 'niter') <- iteration - 1
-
    for (f in cb$post_iter) f()

    if (stop_condition) break
  }
  for (f in cb$finalize) f(finalize = TRUE)

-  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
-
-  # store the total number of boosting iterations
-  bst$niter <- end_iteration
-
  # store the evaluation results
-  if (length(evaluation_log) > 0 &&
-      nrow(evaluation_log) > 0) {
+  keep_evaluation_log <- FALSE
+  if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) {
+    keep_evaluation_log <- TRUE
    # include the previous compatible history when available
    if (inherits(xgb_model, 'xgb.Booster') &&
        !is_update &&
-        !is.null(xgb_model$evaluation_log) &&
+        !is.null(past_evaluation_log) &&
        isTRUE(all.equal(colnames(evaluation_log),
-                         colnames(xgb_model$evaluation_log)))) {
-      evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
+                         colnames(past_evaluation_log)))) {
+      evaluation_log <- rbindlist(list(past_evaluation_log, evaluation_log))
    }
-    bst$evaluation_log <- evaluation_log
  }

-  bst$call <- match.call()
-  bst$params <- params
-  bst$callbacks <- callbacks
-  if (!is.null(colnames(dtrain)))
-    bst$feature_names <- colnames(dtrain)
-  bst$nfeatures <- ncol(dtrain)
+  extra_attrs <- list(
+    call = match.call(),
+    params = params,
+    callbacks = callbacks
+  )
+  if (keep_evaluation_log) {
+    extra_attrs$evaluation_log <- evaluation_log
+  }
+  curr_attrs <- attributes(bst)
+  attributes(bst) <- c(curr_attrs, extra_attrs)

  return(bst)
 }
--- a/R-package/R/xgb.unserialize.R
+++ b/R-package/R/xgb.unserialize.R
@@ -1,41 +0,0 @@
-#' Load the instance back from \code{\link{xgb.serialize}}
-#'
-#' @param buffer the buffer containing booster instance saved by \code{\link{xgb.serialize}}
-#' @param handle An \code{xgb.Booster.handle} object which will be overwritten with
-#' the new deserialized object. Must be a null handle (e.g. when loading the model through
-#' `readRDS`). If not provided, a new handle will be created.
-#' @return An \code{xgb.Booster.handle} object.
-#'
-#' @export
-xgb.unserialize <- function(buffer, handle = NULL) {
-  cachelist <- list()
-  if (is.null(handle)) {
-    handle <- .Call(XGBoosterCreate_R, cachelist)
-  } else {
-    if (!is.null.handle(handle))
-      stop("'handle' is not null/empty. Cannot overwrite existing handle.")
-    .Call(XGBoosterCreateInEmptyObj_R, cachelist, handle)
-  }
-  tryCatch(
-    .Call(XGBoosterUnserializeFromBuffer_R, handle, buffer),
-    error = function(e) {
-      error_msg <- conditionMessage(e)
-      m <- regexec("(src[\\\\/]learner.cc:[0-9]+): Check failed: (header == serialisation_header_)",
-                   error_msg, perl = TRUE)
-      groups <- regmatches(error_msg, m)[[1]]
-      if (length(groups) == 3) {
-        warning(paste("The model had been generated by XGBoost version 1.0.0 or earlier and was ",
-                      "loaded from a RDS file. We strongly ADVISE AGAINST using saveRDS() ",
-                      "function, to ensure that your model can be read in current and upcoming ",
-                      "XGBoost releases. Please use xgb.save() instead to preserve models for the ",
-                      "long term. For more details and explanation, see ",
-                      "https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html",
-                      sep = ""))
-        .Call(XGBoosterLoadModelFromRaw_R, handle, buffer)
-      } else {
-        stop(e)
-      }
-    })
-  class(handle) <- "xgb.Booster.handle"
-  return(handle)
-}
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -100,8 +100,10 @@ NULL
 #' @importFrom jsonlite toJSON
 #' @importFrom methods new
 #' @importFrom utils object.size str tail
+#' @importFrom stats coef
 #' @importFrom stats predict
 #' @importFrom stats median
+#' @importFrom stats variable.names
 #' @importFrom utils head
 #' @importFrom graphics barplot
 #' @importFrom graphics lines