add error for data.frame, add weight to xgboost

This commit is contained in:
unknown 2015-09-02 21:43:23 -07:00
parent baa3145817
commit 3d6c831e8a
3 changed files with 18 additions and 10 deletions

View File

@ -103,17 +103,21 @@ xgb.Booster.check <- function(bst, saveraw = TRUE)
## ----the following are low level iteratively function, not needed if
## you do not want to use them ---------------------------------------
# get dmatrix from data, label
xgb.get.DMatrix <- function(data, label = NULL, missing = NULL) {
xgb.get.DMatrix <- function(data, label = NULL, missing = NULL, weight = NULL) {
inClass <- class(data)
if (inClass == "dgCMatrix" || inClass == "matrix") {
if (is.null(label)) {
stop("xgboost: need label when data is a matrix")
}
dtrain <- xgb.DMatrix(data, label = label)
if (is.null(missing)){
dtrain <- xgb.DMatrix(data, label = label)
} else {
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
}
if (!is.null(weight)){
xgb.setinfo(dtrain, "weight", weight)
}
} else {
if (!is.null(label)) {
warning("xgboost: label will be ignored.")
@ -122,6 +126,9 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NULL) {
dtrain <- xgb.DMatrix(data)
} else if (inClass == "xgb.DMatrix") {
dtrain <- data
} else if (inClass == "data.frame") {
stop("xgboost only support numerical matrix input,
use 'data.frame' to transform the data.")
} else {
stop("xgboost: Invalid input of data")
}

View File

@ -31,6 +31,7 @@
#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.
#' @param weight a vector indicating the weight for each row of the input.
#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered.
#' If set to an integer \code{k}, training with a validation set will stop if the performance
#' keeps getting worse consecutively for \code{k} rounds.
@ -58,14 +59,11 @@
#'
#' @export
#'
xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(), nrounds,
xgboost <- function(data = NULL, label = NULL, missing = NULL, weight = NULL,
params = list(), nrounds,
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) {
if (is.null(missing)) {
dtrain <- xgb.get.DMatrix(data, label)
} else {
dtrain <- xgb.get.DMatrix(data, label, missing)
}
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
params <- append(params, list(...))

View File

@ -4,9 +4,10 @@
\alias{xgboost}
\title{eXtreme Gradient Boosting (Tree) library}
\usage{
xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL,
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...)
xgboost(data = NULL, label = NULL, missing = NULL, weight = NULL,
params = list(), nrounds, verbose = 1, print.every.n = 1L,
early.stop.round = NULL, maximize = NULL, save_period = 0,
save_name = "xgboost.model", ...)
}
\arguments{
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
@ -18,6 +19,8 @@ if data is local data file or \code{xgb.DMatrix}.}
\item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.}
\item{weight}{a vector indicating the weight for each row of the input.}
\item{params}{the list of parameters.
Commonly used ones are: