From ea9285dd4fbb03593770330a96c7deae6a4de79e Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sat, 14 May 2016 18:09:21 -0500 Subject: [PATCH 1/6] methods to delete an attribute and get names of available attributes --- include/xgboost/c_api.h | 17 ++++++++++++++--- include/xgboost/learner.h | 13 ++++++++++++- src/c_api/c_api.cc | 23 ++++++++++++++++++++++- src/learner.cc | 16 ++++++++++++++++ 4 files changed, 64 insertions(+), 5 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 1598aac3b..e6c09cd0b 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -433,16 +433,27 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char** out, int *success); /*! - * \brief Set string attribute. + * \brief Set or delete string attribute. * * \param handle handle - * \param key The key of the symbol. - * \param value The value to be saved. + * \param key The key of the attribute. + * \param value The value to be saved. + * If nullptr, the attribute would be deleted. * \return 0 when success, -1 when failure happens */ XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char* key, const char* value); +/*! + * \brief Get the names of all attribute from Booster. + * \param handle handle + * \param len the argument to hold the output length + * \param out pointer to hold the output attribute stings + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, + bst_ulong* out_len, + const char*** out); // --- Distributed training API---- // NOTE: functions in rabit/c_api.h will be also available in libxgboost.so diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 18c782518..474437bf2 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -121,9 +121,20 @@ class Learner : public rabit::Serializable { * The property will be saved along the booster. * \param key The key of the attribute. * \param out The output value. - * \return Whether the key is contained in the attribute. + * \return Whether the key exists among booster's attributes. */ virtual bool GetAttr(const std::string& key, std::string* out) const = 0; + /*! + * \brief Delete an attribute from the booster. + * \param key The key of the attribute. + * \return Whether the key was found among booster's attributes. + */ + virtual bool DelAttr(const std::string& key) = 0; + /*! + * \brief Get a vector of attribute names from the booster. + * \return vector of attribute name strings. + */ + virtual std::vector GetAttrNames() const = 0; /*! * \return whether the model allow lazy checkpoint in rabit. */ diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index cf6be2046..37fb92c24 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -685,7 +685,28 @@ int XGBoosterSetAttr(BoosterHandle handle, const char* value) { Booster* bst = static_cast(handle); API_BEGIN(); - bst->learner()->SetAttr(key, value); + if (value == nullptr) { + bst->learner()->DelAttr(key); + } else { + bst->learner()->SetAttr(key, value); + } + API_END(); +} + +int XGBoosterGetAttrNames(BoosterHandle handle, + bst_ulong* out_len, + const char*** out) { + std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; + std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; + Booster *bst = static_cast(handle); + API_BEGIN(); + str_vecs = bst->learner()->GetAttrNames(); + charp_vecs.resize(str_vecs.size()); + for (size_t i = 0; i < str_vecs.size(); ++i) { + charp_vecs[i] = str_vecs[i].c_str(); + } + *out = dmlc::BeginPtr(charp_vecs); + *out_len = static_cast(charp_vecs.size()); API_END(); } diff --git a/src/learner.cc b/src/learner.cc index a88f967c4..971fa45bd 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -330,6 +330,22 @@ class LearnerImpl : public Learner { return true; } + bool DelAttr(const std::string& key) override { + auto it = attributes_.find(key); + if (it == attributes_.end()) return false; + attributes_.erase(it); + return true; + } + + std::vector GetAttrNames() const override { + std::vector out; + out.reserve(attributes_.size()); + for(auto &p: attributes_) { + out.push_back(p.first); + } + return out; + } + std::pair Evaluate(DMatrix* data, std::string metric) { if (metric == "auto") metric = obj_->DefaultEvalMetric(); std::unique_ptr ev(Metric::Create(metric.c_str())); From 8664217a5ac34e50c74e137629ae4cbad6d0227e Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sat, 14 May 2016 18:11:29 -0500 Subject: [PATCH 2/6] [R] more attribute handling functionality --- R-package/NAMESPACE | 3 + R-package/R/xgb.Booster.R | 191 ++++++++++++++++++------ R-package/man/xgb.attr.Rd | 55 +++++-- R-package/man/xgb.parameters.Rd | 32 ++++ R-package/src/xgboost_R.cc | 25 +++- R-package/src/xgboost_R.h | 8 +- R-package/tests/testthat/test_helpers.R | 23 ++- 7 files changed, 273 insertions(+), 64 deletions(-) create mode 100644 R-package/man/xgb.parameters.Rd diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 349a1a679..f9f602ce7 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -10,6 +10,8 @@ S3method(predict,xgb.Booster.handle) S3method(setinfo,xgb.DMatrix) S3method(slice,xgb.DMatrix) export("xgb.attr<-") +export("xgb.attributes<-") +export("xgb.parameters<-") export(getinfo) export(print.xgb.DMatrix) export(setinfo) @@ -17,6 +19,7 @@ export(slice) export(xgb.DMatrix) export(xgb.DMatrix.save) export(xgb.attr) +export(xgb.attributes) export(xgb.create.features) export(xgb.cv) export(xgb.dump) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 0c17c0ebb..3005d3ea6 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -2,31 +2,28 @@ # internal utility function xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { if (typeof(cachelist) != "list") { - stop("xgb.Booster: only accepts list of DMatrix as cachelist") + stop("xgb.Booster only accepts list of DMatrix as cachelist") } for (dm in cachelist) { if (class(dm) != "xgb.DMatrix") { - stop("xgb.Booster: only accepts list of DMatrix as cachelist") + stop("xgb.Booster only accepts list of DMatrix as cachelist") } } handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost") - if (length(params) != 0) { - for (i in 1:length(params)) { - p <- params[i] - .Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p), - PACKAGE = "xgboost") - } - } if (!is.null(modelfile)) { if (typeof(modelfile) == "character") { .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost") } else if (typeof(modelfile) == "raw") { .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost") } else { - stop("xgb.Booster: modelfile must be character or raw vector") + stop("modelfile must be character or raw vector") } } - return(structure(handle, class = "xgb.Booster.handle")) + class(handle) <- "xgb.Booster.handle" + if (length(params) > 0) { + xgb.parameters(handle) <- params + } + return(handle) } # Convert xgb.Booster.handle to xgb.Booster @@ -38,6 +35,20 @@ xgb.handleToBooster <- function(handle, raw = NULL) return(bst) } +# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster +# internal utility function +xgb.get.handle <- function(object) { + handle <- switch(class(object)[1], + xgb.Booster = object$handle, + xgb.Booster.handle = object, + stop("argument must be of either xgb.Booster or xgb.Booster.handle class") + ) + if (is.null(handle) | .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) { + stop("invalid xgb.Booster.handle") + } + handle +} + # Check whether an xgb.Booster object is complete # internal utility function xgb.Booster.check <- function(bst, saveraw = TRUE) @@ -146,28 +157,43 @@ predict.xgb.Booster.handle <- function(object, ...) { #' Accessors for serializable attributes of a model. #' -#' These methods allow to manipulate key-value attribute strings of an xgboost model. +#' These methods allow to manipulate the key-value attribute strings of an xgboost model. #' -#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}. -#' @param which a non-empty character string specifying which attribute is to be accessed. -#' @param value a value of an attribute. Non-character values are converted to character. -#' When length of a \code{value} vector is more than one, only the first element is used. +#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}. +#' @param name a non-empty character string specifying which attribute is to be accessed. +#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-} +#' it's a list (or an object coercible to a list) with the names of attributes to set +#' and the elements corresponding to attribute values. +#' Non-character values are converted to character. +#' When attribute value is not a scalar, only the first index is used. +#' Use \code{NULL} to remove an attribute. #' #' @details -#' Note that the xgboost model attributes are a separate concept from the attributes in R. -#' Specifically, they refer to key-value strings that can be attached to an xgboost model -#' and stored within the model's binary representation. +#' The primary purpose of xgboost model attributes is to store some meta-data about the model. +#' Note that they are a separate concept from the object attributes in R. +#' Specifically, they refer to key-value strings that can be attached to an xgboost model, +#' stored together with the model's binary representation, and accessed later +#' (from R or any other interface). #' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class -#' would not be saved by \code{xgb.save}, since xgboost model is an external memory object +#' would not be saved by \code{xgb.save} because an xgboost model is an external memory object #' and its serialization is handled extrnally. +#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't +#' change the value of that parameter for a model. +#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters. #' -#' Also note that the attribute setter would usually work more efficiently for \code{xgb.Booster.handle} -#' than for \code{xgb.Booster}, since only just a handle would need to be copied. +#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle} +#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied. +#' +#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes, +#' but doesn't delete the existing attributes which don't have their names in \code{names(attributes)}. #' #' @return #' \code{xgb.attr} returns either a string value of an attribute #' or \code{NULL} if an attribute wasn't stored in a model. #' +#' \code{xgb.attributes} returns a list of all attribute stored in a model +#' or \code{NULL} if a model has no stored attributes. +#' #' @examples #' data(agaricus.train, package='xgboost') #' train <- agaricus.train @@ -177,42 +203,117 @@ predict.xgb.Booster.handle <- function(object, ...) { #' #' xgb.attr(bst, "my_attribute") <- "my attribute value" #' print(xgb.attr(bst, "my_attribute")) +#' xgb.attributes(bst) <- list(a = 123, b = "abc") #' #' xgb.save(bst, 'xgb.model') #' bst1 <- xgb.load('xgb.model') #' print(xgb.attr(bst1, "my_attribute")) -#' +#' print(xgb.attributes(bst1)) +#' +#' # deletion: +#' xgb.attr(bst1, "my_attribute") <- NULL +#' print(xgb.attributes(bst1)) +#' xgb.attributes(bst1) <- list(a = NULL, b = NULL) +#' print(xgb.attributes(bst1)) +#' #' @rdname xgb.attr #' @export -xgb.attr <- function(object, which) { - if (is.null(which) | nchar(as.character(which)[1]) == 0) stop("invalid attribute name") - handle = xgb.get.handle(object, "xgb.attr") - .Call("XGBoosterGetAttr_R", handle, as.character(which)[1], PACKAGE="xgboost") +xgb.attr <- function(object, name) { + if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name") + handle <- xgb.get.handle(object) + .Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE="xgboost") } #' @rdname xgb.attr #' @export -`xgb.attr<-` <- function(object, which, value) { - if (is.null(which) | nchar(as.character(which)[1]) == 0) stop("invalid attribute name") - handle = xgb.get.handle(object, "xgb.attr") - # TODO: setting NULL value to remove an attribute - .Call("XGBoosterSetAttr_R", handle, as.character(which)[1], as.character(value)[1], PACKAGE="xgboost") +`xgb.attr<-` <- function(object, name, value) { + if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name") + handle <- xgb.get.handle(object) + if (!is.null(value)) { + # Coerce the elements to be scalar strings. + # Q: should we warn user about non-scalar elements? + value <- as.character(value[1]) + } + .Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE="xgboost") if (is(object, 'xgb.Booster') && !is.null(object$raw)) { - object$raw <- xgb.save.raw(object$handle) + object$raw <- xgb.save.raw(object$handle) } object } -# Return a valid handle out of either xgb.Booster.handle or xgb.Booster -# internal utility function -xgb.get.handle <- function(object, caller="") { - handle = switch(class(object), - xgb.Booster = object$handle, - xgb.Booster.handle = object, - stop(caller, ": argument must be either xgb.Booster or xgb.Booster.handle") - ) - if (is.null(handle) | .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) { - stop(caller, ": invalid xgb.Booster.handle") - } - handle +#' @rdname xgb.attr +#' @export +xgb.attributes <- function(object) { + handle <- xgb.get.handle(object) + attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE="xgboost") + if (is.null(attr_names)) return(NULL) + res <- lapply(attr_names, function(x) { + .Call("XGBoosterGetAttr_R", handle, x, PACKAGE="xgboost") + }) + names(res) <- attr_names + res +} + +#' @rdname xgb.attr +#' @export +`xgb.attributes<-` <- function(object, value) { + a <- as.list(value) + if (is.null(names(a)) || any(nchar(names(a)) == 0)) { + stop("attribute names cannot be empty strings") + } + # Coerce the elements to be scalar strings. + # Q: should we warn a user about non-scalar elements? + a <- lapply(a, function(x) { + if (is.null(x)) return(NULL) + as.character(x[1]) + }) + handle <- xgb.get.handle(object) + for (i in seq_along(a)) { + .Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE="xgboost") + } + if (is(object, 'xgb.Booster') && !is.null(object$raw)) { + object$raw <- xgb.save.raw(object$handle) + } + object +} + +#' Accessors for model parameters. +#' +#' Only the setter for xgboost parameters is currently implemented. +#' +#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}. +#' @param value a list (or an object coercible to a list) with the names of parameters to set +#' and the elements corresponding to parameter values. +#' +#' @details +#' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle} +#' than for \code{xgb.Booster}, since only just a handle would need to be copied. +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' +#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, +#' eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") +#' +#' xgb.parameters(bst) <- list(eta = 0.1) +#' +#' @rdname xgb.parameters +#' @export +`xgb.parameters<-` <- function(object, value) { + if (length(value) == 0) return(object) + p <- as.list(value) + if (is.null(names(p)) || any(nchar(names(p)) == 0)) { + stop("parameter names cannot be empty strings") + } + names(p) <- gsub("\\.", "_", names(p)) + p <- lapply(p, function(x) as.character(x)[1]) + handle <- xgb.get.handle(object) + for (i in seq_along(p)) { + .Call("XGBoosterSetParam_R", handle, names(p[i]), p[[i]], PACKAGE = "xgboost") + } + if (is(object, 'xgb.Booster') && !is.null(object$raw)) { + object$raw <- xgb.save.raw(object$handle) + } + object } diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd index 85cfd6df7..3429da958 100644 --- a/R-package/man/xgb.attr.Rd +++ b/R-package/man/xgb.attr.Rd @@ -3,37 +3,58 @@ \name{xgb.attr} \alias{xgb.attr} \alias{xgb.attr<-} +\alias{xgb.attributes} +\alias{xgb.attributes<-} \title{Accessors for serializable attributes of a model.} \usage{ -xgb.attr(object, which) +xgb.attr(object, name) -xgb.attr(object, which) <- value +xgb.attr(object, name) <- value + +xgb.attributes(object) + +xgb.attributes(object) <- value } \arguments{ -\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.} +\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.} -\item{which}{a non-empty character string specifying which attribute is to be accessed.} +\item{name}{a non-empty character string specifying which attribute is to be accessed.} -\item{value}{a value of an attribute. Non-character values are converted to character. -When length of a \code{value} vector is more than one, only the first element is used.} +\item{value}{a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-} +it's a list (or an object coercible to a list) with the names of attributes to set +and the elements corresponding to attribute values. +Non-character values are converted to character. +When attribute value is not a scalar, only the first index is used. +Use \code{NULL} to remove an attribute.} } \value{ \code{xgb.attr} returns either a string value of an attribute or \code{NULL} if an attribute wasn't stored in a model. + +\code{xgb.attributes} returns a list of all attribute stored in a model +or \code{NULL} if a model has no stored attributes. } \description{ -These methods allow to manipulate key-value attribute strings of an xgboost model. +These methods allow to manipulate the key-value attribute strings of an xgboost model. } \details{ -Note that the xgboost model attributes are a separate concept from the attributes in R. -Specifically, they refer to key-value strings that can be attached to an xgboost model -and stored within the model's binary representation. +The primary purpose of xgboost model attributes is to store some meta-data about the model. +Note that they are a separate concept from the object attributes in R. +Specifically, they refer to key-value strings that can be attached to an xgboost model, +stored together with the model's binary representation, and accessed later +(from R or any other interface). In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class -would not be saved by \code{xgb.save}, since xgboost model is an external memory object +would not be saved by \code{xgb.save} because an xgboost model is an external memory object and its serialization is handled extrnally. +Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't +change the value of that parameter for a model. +Use \code{\link{`xgb.parameters<-`}} to set or change model parameters. -Also note that the attribute setter would usually work more efficiently for \code{xgb.Booster.handle} -than for \code{xgb.Booster}, since only just a handle would need to be copied. +The attribute setters would usually work more efficiently for \code{xgb.Booster.handle} +than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied. + +The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes, +but doesn't delete the existing attributes which don't have their names in \code{names(attributes)}. } \examples{ data(agaricus.train, package='xgboost') @@ -44,10 +65,18 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2, xgb.attr(bst, "my_attribute") <- "my attribute value" print(xgb.attr(bst, "my_attribute")) +xgb.attributes(bst) <- list(a = 123, b = "abc") xgb.save(bst, 'xgb.model') bst1 <- xgb.load('xgb.model') print(xgb.attr(bst1, "my_attribute")) +print(xgb.attributes(bst1)) + +# deletion: +xgb.attr(bst1, "my_attribute") <- NULL +print(xgb.attributes(bst1)) +xgb.attributes(bst1) <- list(a = NULL, b = NULL) +print(xgb.attributes(bst1)) } diff --git a/R-package/man/xgb.parameters.Rd b/R-package/man/xgb.parameters.Rd new file mode 100644 index 000000000..e531b5668 --- /dev/null +++ b/R-package/man/xgb.parameters.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.Booster.R +\name{xgb.parameters<-} +\alias{xgb.parameters<-} +\title{Accessors for model parameters.} +\usage{ +xgb.parameters(object) <- value +} +\arguments{ +\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.} + +\item{value}{a list (or an object coercible to a list) with the names of parameters to set +and the elements corresponding to parameter values.} +} +\description{ +Only the setter for xgboost parameters is currently implemented. +} +\details{ +Note that the setter would usually work more efficiently for \code{xgb.Booster.handle} +than for \code{xgb.Booster}, since only just a handle would need to be copied. +} +\examples{ +data(agaricus.train, package='xgboost') +train <- agaricus.train + +bst <- xgboost(data = train$data, label = train$label, max.depth = 2, + eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") + +xgb.parameters(bst) <- list(eta = 0.1) + +} + diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index dcf1228ce..315d93d23 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -390,9 +390,30 @@ SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) { SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) { R_API_BEGIN(); + const char *v = isNull(val) ? nullptr : CHAR(asChar(val)); CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle), - CHAR(asChar(name)), - CHAR(asChar(val)))); + CHAR(asChar(name)), v)); R_API_END(); return R_NilValue; } + +SEXP XGBoosterGetAttrNames_R(SEXP handle) { + SEXP out; + R_API_BEGIN(); + bst_ulong len; + const char **res; + CHECK_CALL(XGBoosterGetAttrNames(R_ExternalPtrAddr(handle), + &len, &res)); + if (len > 0) { + out = PROTECT(allocVector(STRSXP, len)); + for (size_t i = 0; i < len; ++i) { + SET_STRING_ELT(out, i, mkChar(res[i])); + } + } else { + out = PROTECT(R_NilValue); + } + UNPROTECT(1); + R_API_END(); + return out; +} + diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index daf304529..b78a69b67 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -198,9 +198,15 @@ XGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name); * \brief set learner attribute value * \param handle handle * \param name attribute name - * \param val attribute value + * \param val attribute value; NULL value would delete an attribute * \return R_NilValue */ XGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val); +/*! + * \brief get the names of learner attributes + * \return string vector containing attribute names + */ +XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle); + #endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 14a04998e..b5fb264e3 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -28,15 +28,32 @@ test_that("xgb.dump works", { expect_true(xgb.dump(bst.Tree, 'xgb.model.dump', with.stats = T)) }) -test_that("xgb.attr", { +test_that("xgb-attribute functionality", { val <- "my attribute value" + list.val <- list(my_attr=val, a=123, b='ok') + list.ch <- list.val[order(names(list.val))] + list.ch <- lapply(list.ch, as.character) + # proper input: expect_error(xgb.attr(bst.Tree, NULL)) expect_error(xgb.attr(val, val)) + # set & get: + expect_null(xgb.attr(bst.Tree, "asdf")) + expect_null(xgb.attributes(bst.Tree)) # initially, expect no attributes xgb.attr(bst.Tree, "my_attr") <- val expect_equal(xgb.attr(bst.Tree, "my_attr"), val) + xgb.attributes(bst.Tree) <- list.val + expect_equal(xgb.attributes(bst.Tree), list.ch) + # serializing: xgb.save(bst.Tree, 'xgb.model') - bst1 <- xgb.load('xgb.model') - expect_equal(xgb.attr(bst1, "my_attr"), val) + bst <- xgb.load('xgb.model') + expect_equal(xgb.attr(bst, "my_attr"), val) + expect_equal(xgb.attributes(bst), list.ch) + # deletion: + xgb.attr(bst, "my_attr") <- NULL + expect_null(xgb.attr(bst, "my_attr")) + expect_equal(xgb.attributes(bst), list.ch[c("a", "b")]) + xgb.attributes(bst) <- list(a=NULL, b=NULL) + expect_null(xgb.attributes(bst)) }) test_that("xgb.model.dt.tree works with and without feature names", { From a13a3a4d76a169db832d556aa676807bce168d4d Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sun, 15 May 2016 02:05:10 -0500 Subject: [PATCH 3/6] attr_names for python interface; attribute deletion via set_attr --- python-package/xgboost/core.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index b15672473..aad8b9266 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -709,19 +709,37 @@ class Booster(object): else: return None + def attr_names(self): + """Get the names of attributes stored in the Booster. + + Returns + ------- + value : list of strings + Returns an empty list if there's no attributes. + """ + length = ctypes.c_ulong() + sarr = ctypes.POINTER(ctypes.c_char_p)() + _check_call(_LIB.XGBoosterGetAttrNames(self.handle, + ctypes.byref(length), + ctypes.byref(sarr))) + res = from_cstr_to_pystr(sarr, length) + return res + def set_attr(self, **kwargs): """Set the attribute of the Booster. Parameters ---------- **kwargs - The attributes to set + The attributes to set. Setting a value to None deletes an attribute. """ for key, value in kwargs.items(): - if not isinstance(value, STRING_TYPES): - raise ValueError("Set Attr only accepts string values") + if value is not None: + if not isinstance(value, STRING_TYPES): + raise ValueError("Set Attr only accepts string values") + value = c_str(str(value)) _check_call(_LIB.XGBoosterSetAttr( - self.handle, c_str(key), c_str(str(value)))) + self.handle, c_str(key), value)) def set_param(self, params, value=None): """Set parameters into the Booster. From 185fef3fce54d7625e8ae42ff5f4fe3fb3c7b705 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sun, 15 May 2016 02:35:37 -0500 Subject: [PATCH 4/6] fixes for lint --- src/learner.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learner.cc b/src/learner.cc index 971fa45bd..44e56c732 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -340,7 +340,7 @@ class LearnerImpl : public Learner { std::vector GetAttrNames() const override { std::vector out; out.reserve(attributes_.size()); - for(auto &p: attributes_) { + for (auto& p : attributes_) { out.push_back(p.first); } return out; From 26b36714ea2e99dbc1893afd59c1ab5f4a7aaefa Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sun, 15 May 2016 03:05:19 -0500 Subject: [PATCH 5/6] doxygen suggested fix --- include/xgboost/c_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index e6c09cd0b..e75ed2cfe 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -447,7 +447,7 @@ XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, /*! * \brief Get the names of all attribute from Booster. * \param handle handle - * \param len the argument to hold the output length + * \param out_len the argument to hold the output length * \param out pointer to hold the output attribute stings * \return 0 when success, -1 when failure happens */ From ffed95eec08ae4470075147a674a5496336629c6 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sun, 15 May 2016 22:04:38 -0500 Subject: [PATCH 6/6] py: replace attr_names() with attributes() --- python-package/xgboost/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index aad8b9266..f22ca7ef1 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -709,20 +709,21 @@ class Booster(object): else: return None - def attr_names(self): - """Get the names of attributes stored in the Booster. + def attributes(self): + """Get attributes stored in the Booster as a dictionary. Returns ------- - value : list of strings - Returns an empty list if there's no attributes. + result : dictionary of attribute_name: attribute_value pairs of strings. + Returns an empty dict if there's no attributes. """ length = ctypes.c_ulong() sarr = ctypes.POINTER(ctypes.c_char_p)() _check_call(_LIB.XGBoosterGetAttrNames(self.handle, ctypes.byref(length), ctypes.byref(sarr))) - res = from_cstr_to_pystr(sarr, length) + attr_names = from_cstr_to_pystr(sarr, length) + res = {n: self.attr(n) for n in attr_names} return res def set_attr(self, **kwargs):