R accessors for model attributes

This commit is contained in:
Vadim Khotilovich 2016-05-02 00:20:44 -05:00
parent 0839aed380
commit 79c7c9e5bb
4 changed files with 140 additions and 0 deletions

View File

@ -9,12 +9,14 @@ S3method(predict,xgb.Booster)
S3method(predict,xgb.Booster.handle)
S3method(setinfo,xgb.DMatrix)
S3method(slice,xgb.DMatrix)
export("xgb.attr<-")
export(getinfo)
export(print.xgb.DMatrix)
export(setinfo)
export(slice)
export(xgb.DMatrix)
export(xgb.DMatrix.save)
export(xgb.attr)
export(xgb.create.features)
export(xgb.cv)
export(xgb.dump)

View File

@ -142,3 +142,77 @@ predict.xgb.Booster.handle <- function(object, ...) {
ret <- predict(bst, ...)
return(ret)
}
#' Accessors for serializable attributes of a model.
#'
#' These methods allow to manipulate key-value attribute strings of an xgboost model.
#'
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
#' @param which a non-empty character string specifying which attribute is to be accessed.
#' @param value a value of an attribute. Non-character values are converted to character.
#' When length of a \code{value} vector is more than one, only the first element is used.
#'
#' @details
#' Note that the xgboost model attributes are a separate concept from the attributes in R.
#' Specifically, they refer to key-value strings that can be attached to an xgboost model
#' and stored within the model's binary representation.
#' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
#' would not be saved by \code{xgb.save}, since xgboost model is an external memory object
#' and its serialization is handled extrnally.
#'
#' Also note that the attribute setter would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
#'
#' @return
#' \code{xgb.attr} returns either a string value of an attribute
#' or \code{NULL} if an attribute wasn't stored in a model.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
#'
#' xgb.attr(bst, "my_attribute") <- "my attribute value"
#' print(xgb.attr(bst, "my_attribute"))
#'
#' xgb.save(bst, 'xgb.model')
#' bst1 <- xgb.load('xgb.model')
#' print(xgb.attr(bst1, "my_attribute"))
#'
#' @rdname xgb.attr
#' @export
xgb.attr <- function(object, which) {
if (is.null(which) | nchar(as.character(which)[1]) == 0) stop("invalid attribute name")
handle = xgb.get.handle(object, "xgb.attr")
.Call("XGBoosterGetAttr_R", handle, as.character(which)[1], PACKAGE="xgboost")
}
#' @rdname xgb.attr
#' @export
`xgb.attr<-` <- function(object, which, value) {
if (is.null(which) | nchar(as.character(which)[1]) == 0) stop("invalid attribute name")
handle = xgb.get.handle(object, "xgb.attr")
# TODO: setting NULL value to remove an attribute
.Call("XGBoosterSetAttr_R", handle, as.character(which)[1], as.character(value)[1], PACKAGE="xgboost")
if (is(object, 'xgb.Booster') & !is.null(object$raw)) {
object$raw <- xgb.save.raw(object$handle)
}
object
}
# Return a valid handle out of either xgb.Booster.handle or xgb.Booster
# internal utility function
xgb.get.handle <- function(object, caller="") {
handle = switch(class(object),
xgb.Booster = object$handle,
xgb.Booster.handle = object,
stop(caller, ": argument must be either xgb.Booster or xgb.Booster.handle")
)
if (is.null(handle) | .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
stop(caller, ": invalid xgb.Booster.handle")
}
handle
}

53
R-package/man/xgb.attr.Rd Normal file
View File

@ -0,0 +1,53 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.Booster.R
\name{xgb.attr}
\alias{xgb.attr}
\alias{xgb.attr<-}
\title{Accessors for serializable attributes of a model.}
\usage{
xgb.attr(object, which)
xgb.attr(object, which) <- value
}
\arguments{
\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
\item{which}{a non-empty character string specifying which attribute is to be accessed.}
\item{value}{a value of an attribute. Non-character values are converted to character.
When length of a \code{value} vector is more than one, only the first element is used.}
}
\value{
\code{xgb.attr} returns either a string value of an attribute
or \code{NULL} if an attribute wasn't stored in a model.
}
\description{
These methods allow to manipulate key-value attribute strings of an xgboost model.
}
\details{
Note that the xgboost model attributes are a separate concept from the attributes in R.
Specifically, they refer to key-value strings that can be attached to an xgboost model
and stored within the model's binary representation.
In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
would not be saved by \code{xgb.save}, since xgboost model is an external memory object
and its serialization is handled extrnally.
Also note that the attribute setter would usually work more efficiently for \code{xgb.Booster.handle}
than for \code{xgb.Booster}, since only just a handle would need to be copied.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
xgb.attr(bst, "my_attribute") <- "my attribute value"
print(xgb.attr(bst, "my_attribute"))
xgb.save(bst, 'xgb.model')
bst1 <- xgb.load('xgb.model')
print(xgb.attr(bst1, "my_attribute"))
}

View File

@ -28,6 +28,17 @@ test_that("xgb.dump works", {
expect_true(xgb.dump(bst.Tree, 'xgb.model.dump', with.stats = T))
})
test_that("xgb.attr", {
val <- "my attribute value"
expect_error(xgb.attr(bst.Tree, NULL))
expect_error(xgb.attr(val, val))
xgb.attr(bst.Tree, "my_attr") <- val
expect_equal(xgb.attr(bst.Tree, "my_attr"), val)
xgb.save(bst.Tree, 'xgb.model')
bst1 <- xgb.load('xgb.model')
expect_equal(xgb.attr(bst1, "my_attr"), val)
})
test_that("xgb.model.dt.tree works with and without feature names", {
names.dt.trees <- c("ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover",
"Tree", "Yes.Feature", "Yes.Cover", "Yes.Quality", "No.Feature", "No.Cover", "No.Quality")