diff --git a/R-package/R/utils.R b/R-package/R/utils.R index e58601df8..732ef0d11 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -103,17 +103,21 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) ## ----the following are low level iteratively function, not needed if ## you do not want to use them --------------------------------------- # get dmatrix from data, label -xgb.get.DMatrix <- function(data, label = NULL, missing = NULL) { +xgb.get.DMatrix <- function(data, label = NULL, missing = NULL, weight = NULL) { inClass <- class(data) if (inClass == "dgCMatrix" || inClass == "matrix") { if (is.null(label)) { stop("xgboost: need label when data is a matrix") } + dtrain <- xgb.DMatrix(data, label = label) if (is.null(missing)){ dtrain <- xgb.DMatrix(data, label = label) } else { dtrain <- xgb.DMatrix(data, label = label, missing = missing) } + if (!is.null(weight)){ + xgb.setinfo(dtrain, "weight", weight) + } } else { if (!is.null(label)) { warning("xgboost: label will be ignored.") @@ -122,6 +126,9 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NULL) { dtrain <- xgb.DMatrix(data) } else if (inClass == "xgb.DMatrix") { dtrain <- data + } else if (inClass == "data.frame") { + stop("xgboost only support numerical matrix input, + use 'data.frame' to transform the data.") } else { stop("xgboost: Invalid input of data") } diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index c96a7d89e..164dc1838 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -31,6 +31,7 @@ #' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed. #' @param missing Missing is only used when input is dense matrix, pick a float #' value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values. +#' @param weight a vector indicating the weight for each row of the input. #' @param early.stop.round If \code{NULL}, the early stopping function is not triggered. #' If set to an integer \code{k}, training with a validation set will stop if the performance #' keeps getting worse consecutively for \code{k} rounds. @@ -58,14 +59,11 @@ #' #' @export #' -xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(), nrounds, +xgboost <- function(data = NULL, label = NULL, missing = NULL, weight = NULL, + params = list(), nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL, maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) { - if (is.null(missing)) { - dtrain <- xgb.get.DMatrix(data, label) - } else { - dtrain <- xgb.get.DMatrix(data, label, missing) - } + dtrain <- xgb.get.DMatrix(data, label, missing, weight) params <- append(params, list(...)) diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd index 5dfeeacee..a05560a19 100644 --- a/R-package/man/xgboost.Rd +++ b/R-package/man/xgboost.Rd @@ -4,9 +4,10 @@ \alias{xgboost} \title{eXtreme Gradient Boosting (Tree) library} \usage{ -xgboost(data = NULL, label = NULL, missing = NULL, params = list(), - nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL, - maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) +xgboost(data = NULL, label = NULL, missing = NULL, weight = NULL, + params = list(), nrounds, verbose = 1, print.every.n = 1L, + early.stop.round = NULL, maximize = NULL, save_period = 0, + save_name = "xgboost.model", ...) } \arguments{ \item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or @@ -18,6 +19,8 @@ if data is local data file or \code{xgb.DMatrix}.} \item{missing}{Missing is only used when input is dense matrix, pick a float value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.} +\item{weight}{a vector indicating the weight for each row of the input.} + \item{params}{the list of parameters. Commonly used ones are: