From f1d7b012a6d3656125faa515eb4fd84a8540116a Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 6 Sep 2014 11:17:38 -0700 Subject: [PATCH] refine doc, with Rd --- R-package/NAMESPACE | 2 +- R-package/R/getinfo.xgb.DMatrix.R | 5 ++- R-package/R/setinfo.xgb.DMatrix.R | 10 +++-- R-package/R/xgb.cv.R | 9 +++-- R-package/man/getinfo.Rd | 5 ++- R-package/man/setinfo.Rd | 33 ++++++++++++++++ R-package/man/xgb.cv.Rd | 66 +++++++++++++++++++++++++++++++ R-package/man/xgb.train.Rd | 9 +++-- R-package/vignettes/xgboost.Rnw | 5 ++- R-package/vignettes/xgboost.bib | 10 +++++ 10 files changed, 139 insertions(+), 15 deletions(-) create mode 100644 R-package/man/setinfo.Rd create mode 100644 R-package/man/xgb.cv.Rd diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index a13e64931..a8dc5c7c3 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -5,11 +5,11 @@ export(setinfo) export(slice) export(xgb.DMatrix) export(xgb.DMatrix.save) +export(xgb.cv) export(xgb.dump) export(xgb.load) export(xgb.save) export(xgb.train) -export(xgb.cv) export(xgboost) exportMethods(predict) import(methods) diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R index 2a7ae8e5e..5a8f4af3c 100644 --- a/R-package/R/getinfo.xgb.DMatrix.R +++ b/R-package/R/getinfo.xgb.DMatrix.R @@ -8,7 +8,10 @@ setClass('xgb.DMatrix') #' data(iris) #' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) -#' labels <- getinfo(dtrain, "label") +#' labels <- getinfo(dtrain, 'label') +#' setinfo(dtrain, 'label', 1-labels) +#' labels2 <- getinfo(dtrain, 'label') +#' stopifnot(all(labels2 == 1-labels)) #' @rdname getinfo #' @export #' diff --git a/R-package/R/setinfo.xgb.DMatrix.R b/R-package/R/setinfo.xgb.DMatrix.R index 0e40ab4fe..91df89c11 100644 --- a/R-package/R/setinfo.xgb.DMatrix.R +++ b/R-package/R/setinfo.xgb.DMatrix.R @@ -6,7 +6,10 @@ #' data(iris) #' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) -#' labels <- getinfo(dtrain, "label") +#' labels <- getinfo(dtrain, 'label') +#' setinfo(dtrain, 'label', 1-labels) +#' labels2 <- getinfo(dtrain, 'label') +#' stopifnot(all(labels2 == 1-labels)) #' @rdname setinfo #' @export #' @@ -16,9 +19,10 @@ setinfo <- function(object, ...){ #' @param object Object of class "xgb.DMatrix" #' @param name the name of the field to get +#' @param info the specific field of information to set #' @param ... other parameters -#' @rdname getinfo -#' @method getinfo xgb.DMatrix +#' @rdname setinfo +#' @method setinfo xgb.DMatrix setMethod("setinfo", signature = "xgb.DMatrix", definition = function(object, name, info) { xgb.setinfo(object, name, info) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 9bd0f0468..e2be778a2 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -1,6 +1,6 @@ -#' eXtreme Gradient Boosting Training +#' Cross Validation #' -#' The training function of xgboost +#' The cross valudation function of xgboost #' #' @param params the list of parameters. Commonly used ones are: #' \itemize{ @@ -61,7 +61,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, params <- append(params, list(silent=1)) for (mc in metrics) { params <- append(params, list("eval_metric"=mc)) - } + } folds <- xgb.cv.mknfold(dtrain, nfold, params) history <- list() @@ -70,7 +70,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, for (k in 1:nfold) { fd <- folds[[k]] succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj) - msg[[k]] <- strsplit(xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval), "\t")[[1]] + msg[[k]] <- strsplit(xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval), + "\t")[[1]] } ret <- xgb.cv.aggcv(msg, showsd) history <- append(history, ret) diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 7206d6b17..e3ef3067d 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -23,6 +23,9 @@ Get information of an xgb.DMatrix object data(iris) iris[,5] <- as.numeric(iris[,5]=='setosa') dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) -labels <- getinfo(dtrain, "label") +labels <- getinfo(dtrain, 'label') +setinfo(dtrain, 'label', 1-labels) +labels2 <- getinfo(dtrain, 'label') +stopifnot(all(labels2 == 1-labels)) } diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd new file mode 100644 index 000000000..a146d3611 --- /dev/null +++ b/R-package/man/setinfo.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2 (4.0.1): do not edit by hand +\docType{methods} +\name{setinfo} +\alias{setinfo} +\alias{setinfo,xgb.DMatrix-method} +\title{Set information of an xgb.DMatrix object} +\usage{ +setinfo(object, ...) + +\S4method{setinfo}{xgb.DMatrix}(object, name, info) +} +\arguments{ +\item{object}{Object of class "xgb.DMatrix"} + +\item{name}{the name of the field to get} + +\item{info}{the specific field of information to set} + +\item{...}{other parameters} +} +\description{ +Set information of an xgb.DMatrix object +} +\examples{ +data(iris) +iris[,5] <- as.numeric(iris[,5]=='setosa') +dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) +labels <- getinfo(dtrain, 'label') +setinfo(dtrain, 'label', 1-labels) +labels2 <- getinfo(dtrain, 'label') +stopifnot(all(labels2 == 1-labels)) +} + diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd new file mode 100644 index 000000000..10ee6f5f0 --- /dev/null +++ b/R-package/man/xgb.cv.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2 (4.0.1): do not edit by hand +\name{xgb.cv} +\alias{xgb.cv} +\title{Cross Validation} +\usage{ +xgb.cv(params = list(), data, nrounds, nfold, label = NULL, showsd = TRUE, + metrics = list(), obj = NULL, feval = NULL, ...) +} +\arguments{ +\item{params}{the list of parameters. Commonly used ones are: +\itemize{ + \item \code{objective} objective function, common ones are + \itemize{ + \item \code{reg:linear} linear regression + \item \code{binary:logistic} logistic regression for classification + } + \item \code{eta} step size of each boosting step + \item \code{max_depth} maximum depth of the tree + \item \code{nthread} number of thread used in training, if not set, all threads are used +} + + See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for + further details. See also inst/examples/demo.R for walkthrough example in R.} + +\item{data}{takes an \code{xgb.DMatrix} as the input.} + +\item{nrounds}{the max number of iterations} + +\item{nfold}{number of folds used} + +\item{label}{option field, when data is Matrix} + +\item{showd}{boolean, whether show standard deviation of cross validation} + +\item{metrics,}{list of evaluation metrics to be used in corss validation, + when it is not specified, the evaluation metric is chosen according to objective function. + Possible options are: +\itemize{ + \item \code{error} binary classification error rate + \item \code{rmse} Rooted mean square error + \item \code{logloss} negative log-likelihood function + \item \code{auc} Area under curve + \item \code{merror} Exact matching error, used to evaluate multi-class classification +}} + +\item{obj}{customized objective function. Returns gradient and second order +gradient with given prediction and dtrain,} + +\item{feval}{custimized evaluation function. Returns +\code{list(metric='metric-name', value='metric-value')} with given +prediction and dtrain,} + +\item{...}{other parameters to pass to \code{params}.} +} +\description{ +The cross valudation function of xgboost +} +\details{ +This is the cross validation function for xgboost + +Parallelization is automatically enabled if OpenMP is present. +Number of threads can also be manually specified via "nthread" parameter. + +This function only accepts an \code{xgb.DMatrix} object as the input. +} + diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 75c43cd56..f871dcf65 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -3,8 +3,8 @@ \alias{xgb.train} \title{eXtreme Gradient Boosting Training} \usage{ -xgb.train(params = list(), dtrain, nrounds, watchlist = list(), - obj = NULL, feval = NULL, ...) +xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL, + feval = NULL, verbose = 1, ...) } \arguments{ \item{params}{the list of parameters. Commonly used ones are: @@ -22,7 +22,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(), See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for further details. See also inst/examples/demo.R for walkthrough example in R.} -\item{dtrain}{takes an \code{xgb.DMatrix} as the input.} +\item{data}{takes an \code{xgb.DMatrix} as the input.} \item{nrounds}{the max number of iterations} @@ -39,6 +39,9 @@ gradient with given prediction and dtrain,} \code{list(metric='metric-name', value='metric-value')} with given prediction and dtrain,} +\item{verbose}{If 0, xgboost will stay silent. If 1, xgboost will print + information of performance. If 2, xgboost will print information of both} + \item{...}{other parameters to pass to \code{params}.} } \description{ diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw index 45ab1a096..6d2d0640e 100644 --- a/R-package/vignettes/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -73,9 +73,10 @@ and ranking. The package is made to be extendible, so that users are also allowe \end{enumerate} -\section{Example with iris} +\section{Example with Mushroom data} -In this section, we will illustrate some common usage of \verb@xgboost@. +In this section, we will illustrate some common usage of \verb@xgboost@. The +Mushroom data is cited from UCI Machine Learning Repository. \citep{Bache+Lichman:2013} <>= library(xgboost) diff --git a/R-package/vignettes/xgboost.bib b/R-package/vignettes/xgboost.bib index f69866f04..f21bdae16 100644 --- a/R-package/vignettes/xgboost.bib +++ b/R-package/vignettes/xgboost.bib @@ -18,3 +18,13 @@ publisher={Institute of Mathematical Statistics} } + +@misc{ + Bache+Lichman:2013 , + author = "K. Bache and M. Lichman", + year = "2013", + title = "{UCI} Machine Learning Repository", + url = "http://archive.ics.uci.edu/ml", + institution = "University of California, Irvine, School of Information and Computer Sciences" +} +