From d27bfb61b07bbe234bce9f7718f1b42420865d12 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sun, 27 Mar 2016 19:17:13 -0500 Subject: [PATCH] consolidated DMatrix&Booster stuff into xgb.DMatrix.R & xgb.Booster.R --- R-package/R/getinfo.xgb.DMatrix.R | 55 ------------- R-package/R/nrow.xgb.DMatrix.R | 19 ----- R-package/R/predict.xgb.Booster.R | 80 ------------------- R-package/R/predict.xgb.Booster.handle.R | 18 ----- R-package/R/setinfo.xgb.DMatrix.R | 37 --------- R-package/R/slice.xgb.DMatrix.R | 44 ---------- R-package/man/nrow-xgb.DMatrix-method.Rd | 23 ------ R-package/man/predict-xgb.Booster-method.Rd | 53 ------------ .../man/predict-xgb.Booster.handle-method.Rd | 18 ----- R-package/man/slice.Rd | 31 ------- 10 files changed, 378 deletions(-) delete mode 100644 R-package/R/getinfo.xgb.DMatrix.R delete mode 100644 R-package/R/nrow.xgb.DMatrix.R delete mode 100644 R-package/R/predict.xgb.Booster.R delete mode 100644 R-package/R/predict.xgb.Booster.handle.R delete mode 100644 R-package/R/setinfo.xgb.DMatrix.R delete mode 100644 R-package/R/slice.xgb.DMatrix.R delete mode 100644 R-package/man/nrow-xgb.DMatrix-method.Rd delete mode 100644 R-package/man/predict-xgb.Booster-method.Rd delete mode 100644 R-package/man/predict-xgb.Booster.handle-method.Rd delete mode 100644 R-package/man/slice.Rd diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R deleted file mode 100644 index 3000a1e7d..000000000 --- a/R-package/R/getinfo.xgb.DMatrix.R +++ /dev/null @@ -1,55 +0,0 @@ -setClass('xgb.DMatrix') - -#' Get information of an xgb.DMatrix object -#' -#' Get information of an xgb.DMatrix object -#' -#' The information can be one of the following: -#' -#' \itemize{ -#' \item \code{label}: label Xgboost learn from ; -#' \item \code{weight}: to do a weight rescale ; -#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; -#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}. -#' } -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' labels <- getinfo(dtrain, 'label') -#' setinfo(dtrain, 'label', 1-labels) -#' labels2 <- getinfo(dtrain, 'label') -#' stopifnot(all(labels2 == 1-labels)) -#' @rdname getinfo -#' @export -getinfo <- function(object, ...){ - UseMethod("getinfo") -} - - - -#' @param object Object of class \code{xgb.DMatrix} -#' @param name the name of the field to get -#' @param ... other parameters -#' @rdname getinfo -#' @method getinfo xgb.DMatrix -setMethod("getinfo", signature = "xgb.DMatrix", - definition = function(object, name) { - if (typeof(name) != "character") { - stop("xgb.getinfo: name must be character") - } - if (class(object) != "xgb.DMatrix") { - stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix") - } - if (name != "label" && name != "weight" && - name != "base_margin" && name != "nrow") { - stop(paste("xgb.getinfo: unknown info name", name)) - } - if (name != "nrow"){ - ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost") - } else { - ret <- xgb.numrow(object) - } - return(ret) - }) diff --git a/R-package/R/nrow.xgb.DMatrix.R b/R-package/R/nrow.xgb.DMatrix.R deleted file mode 100644 index 9ea039764..000000000 --- a/R-package/R/nrow.xgb.DMatrix.R +++ /dev/null @@ -1,19 +0,0 @@ -setGeneric("nrow") - -#' @title Number of xgb.DMatrix rows -#' @description \code{nrow} return the number of rows present in the \code{xgb.DMatrix}. -#' @param x Object of class \code{xgb.DMatrix} -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' stopifnot(nrow(dtrain) == nrow(train$data)) -#' -#' @export -setMethod("nrow", - signature = "xgb.DMatrix", - definition = function(x) { - xgb.numrow(x) - } -) diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R deleted file mode 100644 index d608f3465..000000000 --- a/R-package/R/predict.xgb.Booster.R +++ /dev/null @@ -1,80 +0,0 @@ -setClass("xgb.Booster.handle") -setClass("xgb.Booster", - slots = c(handle = "xgb.Booster.handle", - raw = "raw")) - -#' Predict method for eXtreme Gradient Boosting model -#' -#' Predicted values based on xgboost model object. -#' -#' @param object Object of class "xgb.Boost" -#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or -#' \code{xgb.DMatrix}. -#' @param missing Missing is only used when input is dense matrix, pick a float -#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values. -#' @param outputmargin whether the prediction should be shown in the original -#' value of sum of functions, when outputmargin=TRUE, the prediction is -#' untransformed margin value. In logistic regression, outputmargin=T will -#' output value before logistic transformation. -#' @param ntreelimit limit number of trees used in prediction, this parameter is -#' only valid for gbtree, but not for gblinear. set it to be value bigger -#' than 0. It will use all trees by default. -#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object. -#' -#' @details -#' The option \code{ntreelimit} purpose is to let the user train a model with lots -#' of trees but use only the first trees for prediction to avoid overfitting -#' (without having to train a new model with less trees). -#' -#' The option \code{predleaf} purpose is inspired from §3.1 of the paper -#' \code{Practical Lessons from Predicting Clicks on Ads at Facebook}. -#' The idea is to use the model as a generator of new features which capture non linear link -#' from original features. -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' data(agaricus.test, package='xgboost') -#' train <- agaricus.train -#' test <- agaricus.test -#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, -#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") -#' pred <- predict(bst, test$data) -#' @export -setMethod("predict", signature = "xgb.Booster", - definition = function(object, newdata, missing = NA, - outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) { - if (class(object) != "xgb.Booster"){ - stop("predict: model in prediction must be of class xgb.Booster") - } else { - object <- xgb.Booster.check(object, saveraw = FALSE) - } - if (class(newdata) != "xgb.DMatrix") { - newdata <- xgb.DMatrix(newdata, missing = missing) - } - if (is.null(ntreelimit)) { - ntreelimit <- 0 - } else { - if (ntreelimit < 1){ - stop("predict: ntreelimit must be equal to or greater than 1") - } - } - option <- 0 - if (outputmargin) { - option <- option + 1 - } - if (predleaf) { - option <- option + 2 - } - ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option), - as.integer(ntreelimit), PACKAGE = "xgboost") - if (predleaf){ - len <- getinfo(newdata, "nrow") - if (length(ret) == len){ - ret <- matrix(ret,ncol = 1) - } else { - ret <- matrix(ret, ncol = len) - ret <- t(ret) - } - } - return(ret) -}) diff --git a/R-package/R/predict.xgb.Booster.handle.R b/R-package/R/predict.xgb.Booster.handle.R deleted file mode 100644 index 3e4013b75..000000000 --- a/R-package/R/predict.xgb.Booster.handle.R +++ /dev/null @@ -1,18 +0,0 @@ -#' Predict method for eXtreme Gradient Boosting model handle -#' -#' Predicted values based on xgb.Booster.handle object. -#' -#' @param object Object of class "xgb.Boost.handle" -#' @param ... Parameters pass to \code{predict.xgb.Booster} -#' -setMethod("predict", signature = "xgb.Booster.handle", - definition = function(object, ...) { - if (class(object) != "xgb.Booster.handle"){ - stop("predict: model in prediction must be of class xgb.Booster.handle") - } - - bst <- xgb.handleToBooster(object) - - ret <- predict(bst, ...) - return(ret) -}) diff --git a/R-package/R/setinfo.xgb.DMatrix.R b/R-package/R/setinfo.xgb.DMatrix.R deleted file mode 100644 index 427de08d4..000000000 --- a/R-package/R/setinfo.xgb.DMatrix.R +++ /dev/null @@ -1,37 +0,0 @@ -#' Set information of an xgb.DMatrix object -#' -#' Set information of an xgb.DMatrix object -#' -#' It can be one of the following: -#' -#' \itemize{ -#' \item \code{label}: label Xgboost learn from ; -#' \item \code{weight}: to do a weight rescale ; -#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; -#' \item \code{group}. -#' } -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' labels <- getinfo(dtrain, 'label') -#' setinfo(dtrain, 'label', 1-labels) -#' labels2 <- getinfo(dtrain, 'label') -#' stopifnot(all(labels2 == 1-labels)) -#' @rdname setinfo -#' @export -setinfo <- function(object, ...){ - UseMethod("setinfo") -} - -#' @param object Object of class "xgb.DMatrix" -#' @param name the name of the field to get -#' @param info the specific field of information to set -#' @param ... other parameters -#' @rdname setinfo -#' @method setinfo xgb.DMatrix -setMethod("setinfo", signature = "xgb.DMatrix", - definition = function(object, name, info) { - xgb.setinfo(object, name, info) - }) diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R deleted file mode 100644 index 4626c2b4d..000000000 --- a/R-package/R/slice.xgb.DMatrix.R +++ /dev/null @@ -1,44 +0,0 @@ -setClass('xgb.DMatrix') - -#' Get a new DMatrix containing the specified rows of -#' orginal xgb.DMatrix object -#' -#' Get a new DMatrix containing the specified rows of -#' orginal xgb.DMatrix object -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' dsub <- slice(dtrain, 1:3) -#' @rdname slice -#' @export -slice <- function(object, ...){ - UseMethod("slice") -} - -#' @param object Object of class "xgb.DMatrix" -#' @param idxset a integer vector of indices of rows needed -#' @param ... other parameters -#' @rdname slice -#' @method slice xgb.DMatrix -setMethod("slice", signature = "xgb.DMatrix", - definition = function(object, idxset, ...) { - if (class(object) != "xgb.DMatrix") { - stop("slice: first argument dtrain must be xgb.DMatrix") - } - ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, - PACKAGE = "xgboost") - - attr_list <- attributes(object) - nr <- xgb.numrow(object) - len <- sapply(attr_list,length) - ind <- which(len == nr) - if (length(ind) > 0) { - nms <- names(attr_list)[ind] - for (i in 1:length(ind)) { - attr(ret,nms[i]) <- attr(object,nms[i])[idxset] - } - } - return(structure(ret, class = "xgb.DMatrix")) - }) diff --git a/R-package/man/nrow-xgb.DMatrix-method.Rd b/R-package/man/nrow-xgb.DMatrix-method.Rd deleted file mode 100644 index 1fd52b9c1..000000000 --- a/R-package/man/nrow-xgb.DMatrix-method.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nrow.xgb.DMatrix.R -\docType{methods} -\name{nrow,xgb.DMatrix-method} -\alias{nrow,xgb.DMatrix-method} -\title{Number of xgb.DMatrix rows} -\usage{ -\S4method{nrow}{xgb.DMatrix}(x) -} -\arguments{ -\item{x}{Object of class \code{xgb.DMatrix}} -} -\description{ -\code{nrow} return the number of rows present in the \code{xgb.DMatrix}. -} -\examples{ -data(agaricus.train, package='xgboost') -train <- agaricus.train -dtrain <- xgb.DMatrix(train$data, label=train$label) -stopifnot(nrow(dtrain) == nrow(train$data)) - -} - diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd deleted file mode 100644 index 341ced8c6..000000000 --- a/R-package/man/predict-xgb.Booster-method.Rd +++ /dev/null @@ -1,53 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict.xgb.Booster.R -\docType{methods} -\name{predict,xgb.Booster-method} -\alias{predict,xgb.Booster-method} -\title{Predict method for eXtreme Gradient Boosting model} -\usage{ -\S4method{predict}{xgb.Booster}(object, newdata, missing = NA, - outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) -} -\arguments{ -\item{object}{Object of class "xgb.Boost"} - -\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or -\code{xgb.DMatrix}.} - -\item{missing}{Missing is only used when input is dense matrix, pick a float -value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.} - -\item{outputmargin}{whether the prediction should be shown in the original -value of sum of functions, when outputmargin=TRUE, the prediction is -untransformed margin value. In logistic regression, outputmargin=T will -output value before logistic transformation.} - -\item{ntreelimit}{limit number of trees used in prediction, this parameter is -only valid for gbtree, but not for gblinear. set it to be value bigger -than 0. It will use all trees by default.} - -\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.} -} -\description{ -Predicted values based on xgboost model object. -} -\details{ -The option \code{ntreelimit} purpose is to let the user train a model with lots -of trees but use only the first trees for prediction to avoid overfitting -(without having to train a new model with less trees). - -The option \code{predleaf} purpose is inspired from §3.1 of the paper -\code{Practical Lessons from Predicting Clicks on Ads at Facebook}. -The idea is to use the model as a generator of new features which capture non linear link -from original features. -} -\examples{ -data(agaricus.train, package='xgboost') -data(agaricus.test, package='xgboost') -train <- agaricus.train -test <- agaricus.test -bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") -pred <- predict(bst, test$data) -} - diff --git a/R-package/man/predict-xgb.Booster.handle-method.Rd b/R-package/man/predict-xgb.Booster.handle-method.Rd deleted file mode 100644 index 34454e555..000000000 --- a/R-package/man/predict-xgb.Booster.handle-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict.xgb.Booster.handle.R -\docType{methods} -\name{predict,xgb.Booster.handle-method} -\alias{predict,xgb.Booster.handle-method} -\title{Predict method for eXtreme Gradient Boosting model handle} -\usage{ -\S4method{predict}{xgb.Booster.handle}(object, ...) -} -\arguments{ -\item{object}{Object of class "xgb.Boost.handle"} - -\item{...}{Parameters pass to \code{predict.xgb.Booster}} -} -\description{ -Predicted values based on xgb.Booster.handle object. -} - diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd deleted file mode 100644 index b17722115..000000000 --- a/R-package/man/slice.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/slice.xgb.DMatrix.R -\docType{methods} -\name{slice} -\alias{slice} -\alias{slice,xgb.DMatrix-method} -\title{Get a new DMatrix containing the specified rows of -orginal xgb.DMatrix object} -\usage{ -slice(object, ...) - -\S4method{slice}{xgb.DMatrix}(object, idxset, ...) -} -\arguments{ -\item{object}{Object of class "xgb.DMatrix"} - -\item{...}{other parameters} - -\item{idxset}{a integer vector of indices of rows needed} -} -\description{ -Get a new DMatrix containing the specified rows of -orginal xgb.DMatrix object -} -\examples{ -data(agaricus.train, package='xgboost') -train <- agaricus.train -dtrain <- xgb.DMatrix(train$data, label=train$label) -dsub <- slice(dtrain, 1:3) -} -