Merge pull request #1198 from khotilov/xgb_attributes

More functionality for model attributes
This commit is contained in:
Tianqi Chen 2016-05-16 09:31:58 -07:00
commit 49bbd72d08
12 changed files with 360 additions and 73 deletions

View File

@ -10,6 +10,8 @@ S3method(predict,xgb.Booster.handle)
S3method(setinfo,xgb.DMatrix)
S3method(slice,xgb.DMatrix)
export("xgb.attr<-")
export("xgb.attributes<-")
export("xgb.parameters<-")
export(getinfo)
export(print.xgb.DMatrix)
export(setinfo)
@ -17,6 +19,7 @@ export(slice)
export(xgb.DMatrix)
export(xgb.DMatrix.save)
export(xgb.attr)
export(xgb.attributes)
export(xgb.create.features)
export(xgb.cv)
export(xgb.dump)

View File

@ -2,31 +2,28 @@
# internal utility function
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
stop("xgb.Booster only accepts list of DMatrix as cachelist")
}
for (dm in cachelist) {
if (class(dm) != "xgb.DMatrix") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
stop("xgb.Booster only accepts list of DMatrix as cachelist")
}
}
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
if (length(params) != 0) {
for (i in 1:length(params)) {
p <- params[i]
.Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p),
PACKAGE = "xgboost")
}
}
if (!is.null(modelfile)) {
if (typeof(modelfile) == "character") {
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else {
stop("xgb.Booster: modelfile must be character or raw vector")
stop("modelfile must be character or raw vector")
}
}
return(structure(handle, class = "xgb.Booster.handle"))
class(handle) <- "xgb.Booster.handle"
if (length(params) > 0) {
xgb.parameters(handle) <- params
}
return(handle)
}
# Convert xgb.Booster.handle to xgb.Booster
@ -38,6 +35,20 @@ xgb.handleToBooster <- function(handle, raw = NULL)
return(bst)
}
# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
# internal utility function
xgb.get.handle <- function(object) {
handle <- switch(class(object)[1],
xgb.Booster = object$handle,
xgb.Booster.handle = object,
stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
)
if (is.null(handle) | .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
stop("invalid xgb.Booster.handle")
}
handle
}
# Check whether an xgb.Booster object is complete
# internal utility function
xgb.Booster.check <- function(bst, saveraw = TRUE)
@ -146,28 +157,43 @@ predict.xgb.Booster.handle <- function(object, ...) {
#' Accessors for serializable attributes of a model.
#'
#' These methods allow to manipulate key-value attribute strings of an xgboost model.
#' These methods allow to manipulate the key-value attribute strings of an xgboost model.
#'
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
#' @param which a non-empty character string specifying which attribute is to be accessed.
#' @param value a value of an attribute. Non-character values are converted to character.
#' When length of a \code{value} vector is more than one, only the first element is used.
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
#' @param name a non-empty character string specifying which attribute is to be accessed.
#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
#' it's a list (or an object coercible to a list) with the names of attributes to set
#' and the elements corresponding to attribute values.
#' Non-character values are converted to character.
#' When attribute value is not a scalar, only the first index is used.
#' Use \code{NULL} to remove an attribute.
#'
#' @details
#' Note that the xgboost model attributes are a separate concept from the attributes in R.
#' Specifically, they refer to key-value strings that can be attached to an xgboost model
#' and stored within the model's binary representation.
#' The primary purpose of xgboost model attributes is to store some meta-data about the model.
#' Note that they are a separate concept from the object attributes in R.
#' Specifically, they refer to key-value strings that can be attached to an xgboost model,
#' stored together with the model's binary representation, and accessed later
#' (from R or any other interface).
#' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
#' would not be saved by \code{xgb.save}, since xgboost model is an external memory object
#' would not be saved by \code{xgb.save} because an xgboost model is an external memory object
#' and its serialization is handled extrnally.
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
#' change the value of that parameter for a model.
#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
#'
#' Also note that the attribute setter would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
#'
#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
#' but doesn't delete the existing attributes which don't have their names in \code{names(attributes)}.
#'
#' @return
#' \code{xgb.attr} returns either a string value of an attribute
#' or \code{NULL} if an attribute wasn't stored in a model.
#'
#' \code{xgb.attributes} returns a list of all attribute stored in a model
#' or \code{NULL} if a model has no stored attributes.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
@ -177,42 +203,117 @@ predict.xgb.Booster.handle <- function(object, ...) {
#'
#' xgb.attr(bst, "my_attribute") <- "my attribute value"
#' print(xgb.attr(bst, "my_attribute"))
#' xgb.attributes(bst) <- list(a = 123, b = "abc")
#'
#' xgb.save(bst, 'xgb.model')
#' bst1 <- xgb.load('xgb.model')
#' print(xgb.attr(bst1, "my_attribute"))
#'
#' print(xgb.attributes(bst1))
#'
#' # deletion:
#' xgb.attr(bst1, "my_attribute") <- NULL
#' print(xgb.attributes(bst1))
#' xgb.attributes(bst1) <- list(a = NULL, b = NULL)
#' print(xgb.attributes(bst1))
#'
#' @rdname xgb.attr
#' @export
xgb.attr <- function(object, which) {
if (is.null(which) | nchar(as.character(which)[1]) == 0) stop("invalid attribute name")
handle = xgb.get.handle(object, "xgb.attr")
.Call("XGBoosterGetAttr_R", handle, as.character(which)[1], PACKAGE="xgboost")
xgb.attr <- function(object, name) {
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
handle <- xgb.get.handle(object)
.Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE="xgboost")
}
#' @rdname xgb.attr
#' @export
`xgb.attr<-` <- function(object, which, value) {
if (is.null(which) | nchar(as.character(which)[1]) == 0) stop("invalid attribute name")
handle = xgb.get.handle(object, "xgb.attr")
# TODO: setting NULL value to remove an attribute
.Call("XGBoosterSetAttr_R", handle, as.character(which)[1], as.character(value)[1], PACKAGE="xgboost")
`xgb.attr<-` <- function(object, name, value) {
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
handle <- xgb.get.handle(object)
if (!is.null(value)) {
# Coerce the elements to be scalar strings.
# Q: should we warn user about non-scalar elements?
value <- as.character(value[1])
}
.Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE="xgboost")
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
object$raw <- xgb.save.raw(object$handle)
object$raw <- xgb.save.raw(object$handle)
}
object
}
# Return a valid handle out of either xgb.Booster.handle or xgb.Booster
# internal utility function
xgb.get.handle <- function(object, caller="") {
handle = switch(class(object),
xgb.Booster = object$handle,
xgb.Booster.handle = object,
stop(caller, ": argument must be either xgb.Booster or xgb.Booster.handle")
)
if (is.null(handle) | .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
stop(caller, ": invalid xgb.Booster.handle")
}
handle
#' @rdname xgb.attr
#' @export
xgb.attributes <- function(object) {
handle <- xgb.get.handle(object)
attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE="xgboost")
if (is.null(attr_names)) return(NULL)
res <- lapply(attr_names, function(x) {
.Call("XGBoosterGetAttr_R", handle, x, PACKAGE="xgboost")
})
names(res) <- attr_names
res
}
#' @rdname xgb.attr
#' @export
`xgb.attributes<-` <- function(object, value) {
a <- as.list(value)
if (is.null(names(a)) || any(nchar(names(a)) == 0)) {
stop("attribute names cannot be empty strings")
}
# Coerce the elements to be scalar strings.
# Q: should we warn a user about non-scalar elements?
a <- lapply(a, function(x) {
if (is.null(x)) return(NULL)
as.character(x[1])
})
handle <- xgb.get.handle(object)
for (i in seq_along(a)) {
.Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE="xgboost")
}
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
object$raw <- xgb.save.raw(object$handle)
}
object
}
#' Accessors for model parameters.
#'
#' Only the setter for xgboost parameters is currently implemented.
#'
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
#' @param value a list (or an object coercible to a list) with the names of parameters to set
#' and the elements corresponding to parameter values.
#'
#' @details
#' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
#'
#' xgb.parameters(bst) <- list(eta = 0.1)
#'
#' @rdname xgb.parameters
#' @export
`xgb.parameters<-` <- function(object, value) {
if (length(value) == 0) return(object)
p <- as.list(value)
if (is.null(names(p)) || any(nchar(names(p)) == 0)) {
stop("parameter names cannot be empty strings")
}
names(p) <- gsub("\\.", "_", names(p))
p <- lapply(p, function(x) as.character(x)[1])
handle <- xgb.get.handle(object)
for (i in seq_along(p)) {
.Call("XGBoosterSetParam_R", handle, names(p[i]), p[[i]], PACKAGE = "xgboost")
}
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
object$raw <- xgb.save.raw(object$handle)
}
object
}

View File

@ -3,37 +3,58 @@
\name{xgb.attr}
\alias{xgb.attr}
\alias{xgb.attr<-}
\alias{xgb.attributes}
\alias{xgb.attributes<-}
\title{Accessors for serializable attributes of a model.}
\usage{
xgb.attr(object, which)
xgb.attr(object, name)
xgb.attr(object, which) <- value
xgb.attr(object, name) <- value
xgb.attributes(object)
xgb.attributes(object) <- value
}
\arguments{
\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
\item{which}{a non-empty character string specifying which attribute is to be accessed.}
\item{name}{a non-empty character string specifying which attribute is to be accessed.}
\item{value}{a value of an attribute. Non-character values are converted to character.
When length of a \code{value} vector is more than one, only the first element is used.}
\item{value}{a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
it's a list (or an object coercible to a list) with the names of attributes to set
and the elements corresponding to attribute values.
Non-character values are converted to character.
When attribute value is not a scalar, only the first index is used.
Use \code{NULL} to remove an attribute.}
}
\value{
\code{xgb.attr} returns either a string value of an attribute
or \code{NULL} if an attribute wasn't stored in a model.
\code{xgb.attributes} returns a list of all attribute stored in a model
or \code{NULL} if a model has no stored attributes.
}
\description{
These methods allow to manipulate key-value attribute strings of an xgboost model.
These methods allow to manipulate the key-value attribute strings of an xgboost model.
}
\details{
Note that the xgboost model attributes are a separate concept from the attributes in R.
Specifically, they refer to key-value strings that can be attached to an xgboost model
and stored within the model's binary representation.
The primary purpose of xgboost model attributes is to store some meta-data about the model.
Note that they are a separate concept from the object attributes in R.
Specifically, they refer to key-value strings that can be attached to an xgboost model,
stored together with the model's binary representation, and accessed later
(from R or any other interface).
In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
would not be saved by \code{xgb.save}, since xgboost model is an external memory object
would not be saved by \code{xgb.save} because an xgboost model is an external memory object
and its serialization is handled extrnally.
Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
change the value of that parameter for a model.
Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
Also note that the attribute setter would usually work more efficiently for \code{xgb.Booster.handle}
than for \code{xgb.Booster}, since only just a handle would need to be copied.
The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
but doesn't delete the existing attributes which don't have their names in \code{names(attributes)}.
}
\examples{
data(agaricus.train, package='xgboost')
@ -44,10 +65,18 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
xgb.attr(bst, "my_attribute") <- "my attribute value"
print(xgb.attr(bst, "my_attribute"))
xgb.attributes(bst) <- list(a = 123, b = "abc")
xgb.save(bst, 'xgb.model')
bst1 <- xgb.load('xgb.model')
print(xgb.attr(bst1, "my_attribute"))
print(xgb.attributes(bst1))
# deletion:
xgb.attr(bst1, "my_attribute") <- NULL
print(xgb.attributes(bst1))
xgb.attributes(bst1) <- list(a = NULL, b = NULL)
print(xgb.attributes(bst1))
}

View File

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.Booster.R
\name{xgb.parameters<-}
\alias{xgb.parameters<-}
\title{Accessors for model parameters.}
\usage{
xgb.parameters(object) <- value
}
\arguments{
\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
\item{value}{a list (or an object coercible to a list) with the names of parameters to set
and the elements corresponding to parameter values.}
}
\description{
Only the setter for xgboost parameters is currently implemented.
}
\details{
Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
than for \code{xgb.Booster}, since only just a handle would need to be copied.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
xgb.parameters(bst) <- list(eta = 0.1)
}

View File

@ -390,9 +390,30 @@ SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) {
SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) {
R_API_BEGIN();
const char *v = isNull(val) ? nullptr : CHAR(asChar(val));
CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val))));
CHAR(asChar(name)), v));
R_API_END();
return R_NilValue;
}
SEXP XGBoosterGetAttrNames_R(SEXP handle) {
SEXP out;
R_API_BEGIN();
bst_ulong len;
const char **res;
CHECK_CALL(XGBoosterGetAttrNames(R_ExternalPtrAddr(handle),
&len, &res));
if (len > 0) {
out = PROTECT(allocVector(STRSXP, len));
for (size_t i = 0; i < len; ++i) {
SET_STRING_ELT(out, i, mkChar(res[i]));
}
} else {
out = PROTECT(R_NilValue);
}
UNPROTECT(1);
R_API_END();
return out;
}

View File

@ -198,9 +198,15 @@ XGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name);
* \brief set learner attribute value
* \param handle handle
* \param name attribute name
* \param val attribute value
* \param val attribute value; NULL value would delete an attribute
* \return R_NilValue
*/
XGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val);
/*!
* \brief get the names of learner attributes
* \return string vector containing attribute names
*/
XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)

View File

@ -28,15 +28,32 @@ test_that("xgb.dump works", {
expect_true(xgb.dump(bst.Tree, 'xgb.model.dump', with.stats = T))
})
test_that("xgb.attr", {
test_that("xgb-attribute functionality", {
val <- "my attribute value"
list.val <- list(my_attr=val, a=123, b='ok')
list.ch <- list.val[order(names(list.val))]
list.ch <- lapply(list.ch, as.character)
# proper input:
expect_error(xgb.attr(bst.Tree, NULL))
expect_error(xgb.attr(val, val))
# set & get:
expect_null(xgb.attr(bst.Tree, "asdf"))
expect_null(xgb.attributes(bst.Tree)) # initially, expect no attributes
xgb.attr(bst.Tree, "my_attr") <- val
expect_equal(xgb.attr(bst.Tree, "my_attr"), val)
xgb.attributes(bst.Tree) <- list.val
expect_equal(xgb.attributes(bst.Tree), list.ch)
# serializing:
xgb.save(bst.Tree, 'xgb.model')
bst1 <- xgb.load('xgb.model')
expect_equal(xgb.attr(bst1, "my_attr"), val)
bst <- xgb.load('xgb.model')
expect_equal(xgb.attr(bst, "my_attr"), val)
expect_equal(xgb.attributes(bst), list.ch)
# deletion:
xgb.attr(bst, "my_attr") <- NULL
expect_null(xgb.attr(bst, "my_attr"))
expect_equal(xgb.attributes(bst), list.ch[c("a", "b")])
xgb.attributes(bst) <- list(a=NULL, b=NULL)
expect_null(xgb.attributes(bst))
})
test_that("xgb.model.dt.tree works with and without feature names", {

View File

@ -433,16 +433,27 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
const char** out,
int *success);
/*!
* \brief Set string attribute.
* \brief Set or delete string attribute.
*
* \param handle handle
* \param key The key of the symbol.
* \param value The value to be saved.
* \param key The key of the attribute.
* \param value The value to be saved.
* If nullptr, the attribute would be deleted.
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
const char* key,
const char* value);
/*!
* \brief Get the names of all attribute from Booster.
* \param handle handle
* \param out_len the argument to hold the output length
* \param out pointer to hold the output attribute stings
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
bst_ulong* out_len,
const char*** out);
// --- Distributed training API----
// NOTE: functions in rabit/c_api.h will be also available in libxgboost.so

View File

@ -121,9 +121,20 @@ class Learner : public rabit::Serializable {
* The property will be saved along the booster.
* \param key The key of the attribute.
* \param out The output value.
* \return Whether the key is contained in the attribute.
* \return Whether the key exists among booster's attributes.
*/
virtual bool GetAttr(const std::string& key, std::string* out) const = 0;
/*!
* \brief Delete an attribute from the booster.
* \param key The key of the attribute.
* \return Whether the key was found among booster's attributes.
*/
virtual bool DelAttr(const std::string& key) = 0;
/*!
* \brief Get a vector of attribute names from the booster.
* \return vector of attribute name strings.
*/
virtual std::vector<std::string> GetAttrNames() const = 0;
/*!
* \return whether the model allow lazy checkpoint in rabit.
*/

View File

@ -709,19 +709,38 @@ class Booster(object):
else:
return None
def attributes(self):
"""Get attributes stored in the Booster as a dictionary.
Returns
-------
result : dictionary of attribute_name: attribute_value pairs of strings.
Returns an empty dict if there's no attributes.
"""
length = ctypes.c_ulong()
sarr = ctypes.POINTER(ctypes.c_char_p)()
_check_call(_LIB.XGBoosterGetAttrNames(self.handle,
ctypes.byref(length),
ctypes.byref(sarr)))
attr_names = from_cstr_to_pystr(sarr, length)
res = {n: self.attr(n) for n in attr_names}
return res
def set_attr(self, **kwargs):
"""Set the attribute of the Booster.
Parameters
----------
**kwargs
The attributes to set
The attributes to set. Setting a value to None deletes an attribute.
"""
for key, value in kwargs.items():
if not isinstance(value, STRING_TYPES):
raise ValueError("Set Attr only accepts string values")
if value is not None:
if not isinstance(value, STRING_TYPES):
raise ValueError("Set Attr only accepts string values")
value = c_str(str(value))
_check_call(_LIB.XGBoosterSetAttr(
self.handle, c_str(key), c_str(str(value))))
self.handle, c_str(key), value))
def set_param(self, params, value=None):
"""Set parameters into the Booster.

View File

@ -685,7 +685,28 @@ int XGBoosterSetAttr(BoosterHandle handle,
const char* value) {
Booster* bst = static_cast<Booster*>(handle);
API_BEGIN();
bst->learner()->SetAttr(key, value);
if (value == nullptr) {
bst->learner()->DelAttr(key);
} else {
bst->learner()->SetAttr(key, value);
}
API_END();
}
int XGBoosterGetAttrNames(BoosterHandle handle,
bst_ulong* out_len,
const char*** out) {
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
Booster *bst = static_cast<Booster*>(handle);
API_BEGIN();
str_vecs = bst->learner()->GetAttrNames();
charp_vecs.resize(str_vecs.size());
for (size_t i = 0; i < str_vecs.size(); ++i) {
charp_vecs[i] = str_vecs[i].c_str();
}
*out = dmlc::BeginPtr(charp_vecs);
*out_len = static_cast<bst_ulong>(charp_vecs.size());
API_END();
}

View File

@ -330,6 +330,22 @@ class LearnerImpl : public Learner {
return true;
}
bool DelAttr(const std::string& key) override {
auto it = attributes_.find(key);
if (it == attributes_.end()) return false;
attributes_.erase(it);
return true;
}
std::vector<std::string> GetAttrNames() const override {
std::vector<std::string> out;
out.reserve(attributes_.size());
for (auto& p : attributes_) {
out.push_back(p.first);
}
return out;
}
std::pair<std::string, float> Evaluate(DMatrix* data, std::string metric) {
if (metric == "auto") metric = obj_->DefaultEvalMetric();
std::unique_ptr<Metric> ev(Metric::Create(metric.c_str()));