Merge remote-tracking branch 'upstream/master'

This commit is contained in:
El Potaeto
2015-01-28 10:13:58 +01:00
35 changed files with 117 additions and 68 deletions

View File

@@ -1,4 +1,4 @@
# Generated by roxygen2 (4.1.0): do not edit by hand
# Generated by roxygen2 (4.0.1): do not edit by hand
export(getinfo)
export(setinfo)

View File

@@ -7,8 +7,8 @@ setClass("xgb.Booster")
#' @param object Object of class "xgb.Boost"
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or
#' \code{xgb.DMatrix}.
#' @param missing Missing is only used when input is dense matrix, pick a float
# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param outputmargin whether the prediction should be shown in the original
#' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will

View File

@@ -28,6 +28,18 @@ setMethod("slice", signature = "xgb.DMatrix",
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
PACKAGE = "xgboost")
attr_list <- attributes(object)
nr <- xgb.numrow(object)
len <- sapply(attr_list,length)
ind <- which(len==nr)
if (length(ind)>0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
}
}
return(structure(ret, class = "xgb.DMatrix"))
})

View File

@@ -6,7 +6,7 @@
#' indicating the data file.
#' @param info a list of information of the xgb.DMatrix object
#' @param missing Missing is only used when input is dense matrix, pick a float
# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#
#' @param ... other information to pass to \code{info}.
#'

View File

@@ -32,7 +32,7 @@
#' @param nfold number of folds used
#' @param label option field, when data is Matrix
#' @param missing Missing is only used when input is dense matrix, pick a float
# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param prediction A logical value indicating whether to return the prediction vector.
#' @param showsd \code{boolean}, whether show standard deviation of cross validation
#' @param metrics, list of evaluation metrics to be used in corss validation,
@@ -134,4 +134,4 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(".")
globalVariables(".")

View File

@@ -5,7 +5,7 @@
#' @param data takes \code{matrix}, \code{dgCMatrix}, local data file or
#' \code{xgb.DMatrix}.
#' @param label the response variable. User should not set this field,
# if data is local data file or \code{xgb.DMatrix}.
#' if data is local data file or \code{xgb.DMatrix}.
#' @param params the list of parameters. Commonly used ones are:
#' \itemize{
#' \item \code{objective} objective function, common ones are
@@ -24,8 +24,8 @@
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
#' information of performance. If 2, xgboost will print information of both
#' performance and construction progress information
#' @param missing Missing is only used when input is dense matrix, pick a float
# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param ... other parameters to pass to \code{params}.
#'
#' @details

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgboost.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{data}
\name{agaricus.test}
\alias{agaricus.test}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgboost.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{data}
\name{agaricus.train}
\alias{agaricus.train}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/getinfo.xgb.DMatrix.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{getinfo}
\alias{getinfo}
@@ -13,9 +12,9 @@ getinfo(object, ...)
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{...}{other parameters}
\item{name}{the name of the field to get}
\item{...}{other parameters}
}
\description{
Get information of an xgb.DMatrix object

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{predict,xgb.Booster-method}
\alias{predict,xgb.Booster-method}
@@ -14,7 +13,8 @@
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
\code{xgb.DMatrix}.}
\item{missing}{Missing is only used when input is dense matrix, pick a float}
\item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
\item{outputmargin}{whether the prediction should be shown in the original
value of sum of functions, when outputmargin=TRUE, the prediction is

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/setinfo.xgb.DMatrix.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{setinfo}
\alias{setinfo}
@@ -13,11 +12,11 @@ setinfo(object, ...)
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{...}{other parameters}
\item{name}{the name of the field to get}
\item{info}{the specific field of information to set}
\item{...}{other parameters}
}
\description{
Set information of an xgb.DMatrix object

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/slice.xgb.DMatrix.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{slice}
\alias{slice}
@@ -14,9 +13,9 @@ slice(object, ...)
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{...}{other parameters}
\item{idxset}{a integer vector of indices of rows needed}
\item{...}{other parameters}
}
\description{
Get a new DMatrix containing the specified rows of

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.DMatrix}
\alias{xgb.DMatrix}
\title{Contruct xgb.DMatrix object}
@@ -12,7 +11,8 @@ indicating the data file.}
\item{info}{a list of information of the xgb.DMatrix object}
\item{missing}{Missing is only used when input is dense matrix, pick a float}
\item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
\item{...}{other information to pass to \code{info}.}
}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.DMatrix.save.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.DMatrix.save}
\alias{xgb.DMatrix.save}
\title{Save xgb.DMatrix object to binary file}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.cv.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.cv}
\alias{xgb.cv}
\title{Cross Validation}
@@ -32,7 +31,8 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
\item{label}{option field, when data is Matrix}
\item{missing}{Missing is only used when input is dense matrix, pick a float}
\item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
\item{prediction}{A logical value indicating whether to return the prediction vector.}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.dump.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.dump}
\alias{xgb.dump}
\title{Save xgboost model to text file}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.importance.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.importance}
\alias{xgb.importance}
\title{Show importance of features in a model}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.load.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.load}
\alias{xgb.load}
\title{Load xgboost model from binary file}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.model.dt.tree.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.model.dt.tree}
\alias{xgb.model.dt.tree}
\title{Convert tree model dump to data.table}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.plot.tree.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.plot.tree}
\alias{xgb.plot.tree}
\title{Plot a boosted tree model}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.save.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.save}
\alias{xgb.save}
\title{Save xgboost model to binary file}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgb.train.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgb.train}
\alias{xgb.train}
\title{eXtreme Gradient Boosting Training}

View File

@@ -1,5 +1,4 @@
% Generated by roxygen2 (4.1.0): do not edit by hand
% Please edit documentation in R/xgboost.R
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{xgboost}
\alias{xgboost}
\title{eXtreme Gradient Boosting (Tree) library}
@@ -11,9 +10,8 @@ xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
\code{xgb.DMatrix}.}
\item{label}{the response variable. User should not set this field,}
\item{missing}{Missing is only used when input is dense matrix, pick a float}
\item{label}{the response variable. User should not set this field,
if data is local data file or \code{xgb.DMatrix}.}
\item{params}{the list of parameters. Commonly used ones are:
\itemize{
@@ -36,6 +34,9 @@ xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
information of performance. If 2, xgboost will print information of both
performance and construction progress information}
\item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
\item{...}{other parameters to pass to \code{params}.}
}
\description{

View File

@@ -71,13 +71,13 @@ extern "C" {
SEXP missing) {
_WrapperBegin();
SEXP dim = getAttrib(mat, R_DimSymbol);
int nrow = INTEGER(dim)[0];
int ncol = INTEGER(dim)[1];
bst_ulong nrow = static_cast<bst_ulong>(INTEGER(dim)[0]);
bst_ulong ncol = static_cast<bst_ulong>(INTEGER(dim)[1]);
double *din = REAL(mat);
std::vector<float> data(nrow * ncol);
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
for (int j = 0; j < ncol; ++j) {
for (bst_omp_uint i = 0; i < nrow; ++i) {
for (bst_ulong j = 0; j < ncol; ++j) {
data[i * ncol +j] = din[i + nrow * j];
}
}