add error for data.frame, add weight to xgboost
This commit is contained in:
parent
baa3145817
commit
3d6c831e8a
@ -103,17 +103,21 @@ xgb.Booster.check <- function(bst, saveraw = TRUE)
|
|||||||
## ----the following are low level iteratively function, not needed if
|
## ----the following are low level iteratively function, not needed if
|
||||||
## you do not want to use them ---------------------------------------
|
## you do not want to use them ---------------------------------------
|
||||||
# get dmatrix from data, label
|
# get dmatrix from data, label
|
||||||
xgb.get.DMatrix <- function(data, label = NULL, missing = NULL) {
|
xgb.get.DMatrix <- function(data, label = NULL, missing = NULL, weight = NULL) {
|
||||||
inClass <- class(data)
|
inClass <- class(data)
|
||||||
if (inClass == "dgCMatrix" || inClass == "matrix") {
|
if (inClass == "dgCMatrix" || inClass == "matrix") {
|
||||||
if (is.null(label)) {
|
if (is.null(label)) {
|
||||||
stop("xgboost: need label when data is a matrix")
|
stop("xgboost: need label when data is a matrix")
|
||||||
}
|
}
|
||||||
|
dtrain <- xgb.DMatrix(data, label = label)
|
||||||
if (is.null(missing)){
|
if (is.null(missing)){
|
||||||
dtrain <- xgb.DMatrix(data, label = label)
|
dtrain <- xgb.DMatrix(data, label = label)
|
||||||
} else {
|
} else {
|
||||||
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
|
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
|
||||||
}
|
}
|
||||||
|
if (!is.null(weight)){
|
||||||
|
xgb.setinfo(dtrain, "weight", weight)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!is.null(label)) {
|
if (!is.null(label)) {
|
||||||
warning("xgboost: label will be ignored.")
|
warning("xgboost: label will be ignored.")
|
||||||
@ -122,6 +126,9 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NULL) {
|
|||||||
dtrain <- xgb.DMatrix(data)
|
dtrain <- xgb.DMatrix(data)
|
||||||
} else if (inClass == "xgb.DMatrix") {
|
} else if (inClass == "xgb.DMatrix") {
|
||||||
dtrain <- data
|
dtrain <- data
|
||||||
|
} else if (inClass == "data.frame") {
|
||||||
|
stop("xgboost only support numerical matrix input,
|
||||||
|
use 'data.frame' to transform the data.")
|
||||||
} else {
|
} else {
|
||||||
stop("xgboost: Invalid input of data")
|
stop("xgboost: Invalid input of data")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -31,6 +31,7 @@
|
|||||||
#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
|
#' @param print.every.n Print every N progress messages when \code{verbose>0}. Default is 1 which means all messages are printed.
|
||||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||||
#' value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.
|
#' value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.
|
||||||
|
#' @param weight a vector indicating the weight for each row of the input.
|
||||||
#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered.
|
#' @param early.stop.round If \code{NULL}, the early stopping function is not triggered.
|
||||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||||
#' keeps getting worse consecutively for \code{k} rounds.
|
#' keeps getting worse consecutively for \code{k} rounds.
|
||||||
@ -58,14 +59,11 @@
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgboost <- function(data = NULL, label = NULL, missing = NULL, params = list(), nrounds,
|
xgboost <- function(data = NULL, label = NULL, missing = NULL, weight = NULL,
|
||||||
|
params = list(), nrounds,
|
||||||
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
||||||
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) {
|
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) {
|
||||||
if (is.null(missing)) {
|
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
|
||||||
dtrain <- xgb.get.DMatrix(data, label)
|
|
||||||
} else {
|
|
||||||
dtrain <- xgb.get.DMatrix(data, label, missing)
|
|
||||||
}
|
|
||||||
|
|
||||||
params <- append(params, list(...))
|
params <- append(params, list(...))
|
||||||
|
|
||||||
|
|||||||
@ -4,9 +4,10 @@
|
|||||||
\alias{xgboost}
|
\alias{xgboost}
|
||||||
\title{eXtreme Gradient Boosting (Tree) library}
|
\title{eXtreme Gradient Boosting (Tree) library}
|
||||||
\usage{
|
\usage{
|
||||||
xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
|
xgboost(data = NULL, label = NULL, missing = NULL, weight = NULL,
|
||||||
nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL,
|
params = list(), nrounds, verbose = 1, print.every.n = 1L,
|
||||||
maximize = NULL, save_period = 0, save_name = "xgboost.model", ...)
|
early.stop.round = NULL, maximize = NULL, save_period = 0,
|
||||||
|
save_name = "xgboost.model", ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||||
@ -18,6 +19,8 @@ if data is local data file or \code{xgb.DMatrix}.}
|
|||||||
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
||||||
value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.}
|
value that represents missing value. Sometimes a data use 0 or other extreme value to represents missing values.}
|
||||||
|
|
||||||
|
\item{weight}{a vector indicating the weight for each row of the input.}
|
||||||
|
|
||||||
\item{params}{the list of parameters.
|
\item{params}{the list of parameters.
|
||||||
|
|
||||||
Commonly used ones are:
|
Commonly used ones are:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user