Merge branch 'master' of ssh://github.com/tqchen/xgboost

This commit is contained in:
tqchen 2014-09-05 20:34:46 -07:00
commit bc1817ca2f
22 changed files with 81 additions and 26 deletions

View File

@ -6,7 +6,7 @@ setClass('xgb.DMatrix')
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' labels <- getinfo(dtrain, "label")
#' @rdname getinfo

View File

@ -11,11 +11,12 @@ setClass("xgb.Booster")
#' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will
#' output value before logistic transformation.
#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
#' set it to be value bigger than 0. It will use all trees by default.
#' @param ntreelimit limit number of trees used in prediction, this parameter is
#' only valid for gbtree, but not for gblinear. set it to be value bigger
#' than 0. It will use all trees by default.
#' @examples
#' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' pred <- predict(bst, as.matrix(iris[,1:4]))
#' @export
#'

View File

@ -8,7 +8,7 @@ setClass('xgb.DMatrix')
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' dsub <- slice(dtrain, 1:3)
#' @rdname slice

View File

@ -12,7 +12,7 @@
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
#' dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -7,7 +7,7 @@
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
#' dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -13,7 +13,7 @@
#'
#' @examples
#' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' xgb.dump(bst, 'iris.xgb.model.dump')
#' @export
#'

View File

@ -6,7 +6,7 @@
#'
#' @examples
#' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' xgb.save(bst, 'iris.xgb.model')
#' bst <- xgb.load('iris.xgb.model')
#' pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -7,7 +7,7 @@
#'
#' @examples
#' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' xgb.save(bst, 'iris.xgb.model')
#' bst <- xgb.load('iris.xgb.model')
#' pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -44,7 +44,7 @@
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' dtest <- dtrain
#' watchlist <- list(eval = dtest, train = dtrain)

View File

@ -34,7 +34,7 @@
#'
#' @examples
#' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' pred <- predict(bst, as.matrix(iris[,1:4]))
#' @export
#'

View File

@ -21,7 +21,7 @@ Get information of an xgb.DMatrix object
}
\examples{
data(iris)
iris[,5] <- as.numeric(iris[,5])
iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
labels <- getinfo(dtrain, "label")
}

View File

@ -18,15 +18,16 @@ value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
set it to be value bigger than 0. It will use all trees by default.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is
only valid for gbtree, but not for gblinear. set it to be value bigger
than 0. It will use all trees by default.}
}
\description{
Predicted values based on xgboost model object.
}
\examples{
data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
pred <- predict(bst, as.matrix(iris[,1:4]))
}

View File

@ -23,7 +23,7 @@ orginal xgb.DMatrix object
}
\examples{
data(iris)
iris[,5] <- as.numeric(iris[,5])
iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
dsub <- slice(dtrain, 1:3)
}

View File

@ -20,7 +20,7 @@ Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
}
\examples{
data(iris)
iris[,5] <- as.numeric(iris[,5])
iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -15,7 +15,7 @@ Save xgb.DMatrix object to binary file
}
\examples{
data(iris)
iris[,5] <- as.numeric(iris[,5])
iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -21,7 +21,7 @@ Save a xgboost model to text file. Could be parsed later.
}
\examples{
data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
xgb.dump(bst, 'iris.xgb.model.dump')
}

View File

@ -13,7 +13,7 @@ Load xgboost model from the binary model file
}
\examples{
data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
xgb.save(bst, 'iris.xgb.model')
bst <- xgb.load('iris.xgb.model')
pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -15,7 +15,7 @@ Save xgboost model from xgboost or xgb.train
}
\examples{
data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
xgb.save(bst, 'iris.xgb.model')
bst <- xgb.load('iris.xgb.model')
pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -56,7 +56,7 @@ therefore it is more flexible than \code{\link{xgboost}}.
}
\examples{
data(iris)
iris[,5] <- as.numeric(iris[,5])
iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
dtest <- dtrain
watchlist <- list(eval = dtest, train = dtrain)

View File

@ -46,7 +46,7 @@ Number of threads can also be manually specified via "nthread" parameter
}
\examples{
data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
pred <- predict(bst, as.matrix(iris[,1:4]))
}

View File

@ -80,7 +80,7 @@ In this section, we will illustrate some common usage of \verb@xgboost@.
<<Training and prediction with iris>>=
library(xgboost)
data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]),
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'),
nrounds = 5)
xgb.save(bst, 'model.save')
bst = xgb.load('model.save')
@ -121,7 +121,7 @@ training from initial prediction value, weighted training instance.
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
<<xgb.DMatrix>>=
iris.mat <- as.matrix(iris[,1:4])
iris.label <- as.numeric(iris[,5])
iris.label <- as.numeric(iris[,5]=='setosa')
diris <- xgb.DMatrix(iris.mat, label = iris.label)
class(diris)
getinfo(diris,'label')

View File

@ -0,0 +1,53 @@
require(xgboost)
dtrain <- xgb.DMatrix('../data/agaricus.txt.train')
dtest <- xgb.DMatrix('../data/agaricus.txt.test')
param <- list(max_depth=2,eta=1,silent=1,objective='binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
num_round <- 2
bst <- xgb.train(param, dtrain, num_round, watchlist)
preds <- predict(bst, dtest)
labels <- getinfo(dtest,'label')
cat('error=', mean(as.numeric(preds>0.5)!=labels),'\n')
xgb.save(bst, 'xgb.model')
xgb.dump(bst, 'dump.raw.txt')
xgb.dump(bst, 'dump.nuce.txt','../data/featmap.txt')
bst2 <- xgb.load('xgb.model')
preds2 <- predict(bst2,dtest)
stopifnot(sum((preds-preds2)^2)==0)
cat('start running example of build DMatrix from scipy.sparse CSR Matrix\n')
read.libsvm <- function(fname, maxcol) {
content <- readLines(fname)
nline <- length(content)
label <- numeric(nline)
mat <- matrix(0, nline, maxcol + 1)
for (i in 1:nline) {
arr <- as.vector(strsplit(content[i], " ")[[1]])
label[i] <- as.numeric(arr[[1]])
for (j in 2:length(arr)) {
kv <- strsplit(arr[j], ":")[[1]]
# to avoid 0 index
findex <- as.integer(kv[1]) + 1
fvalue <- as.numeric(kv[2])
mat[i, findex] <- fvalue
}
}
mat <- as(mat, "sparseMatrix")
return(list(label = label, data = mat))
}
csc <- read.libsvm("../data/agaricus.txt.train", 126)
y <- csc$label
x <- csc$data
class(x)
dtrain <- xgb.DMatrix(x, label = y)
bst <- xgb.train(param, dtrain, num_round, watchlist)
cat('start running example of build DMatrix from numpy array\n')
x <- as.matrix(x)
class(x)
dtrain <- xgb.DMatrix(x, label = y)
bst <- xgb.train(param, dtrain, num_round, watchlist)