Merge branch 'master' of ssh://github.com/tqchen/xgboost

This commit is contained in:
tqchen 2014-09-05 20:34:46 -07:00
commit bc1817ca2f
22 changed files with 81 additions and 26 deletions

View File

@ -6,7 +6,7 @@ setClass('xgb.DMatrix')
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' labels <- getinfo(dtrain, "label") #' labels <- getinfo(dtrain, "label")
#' @rdname getinfo #' @rdname getinfo

View File

@ -11,11 +11,12 @@ setClass("xgb.Booster")
#' value of sum of functions, when outputmargin=TRUE, the prediction is #' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will #' untransformed margin value. In logistic regression, outputmargin=T will
#' output value before logistic transformation. #' output value before logistic transformation.
#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. #' @param ntreelimit limit number of trees used in prediction, this parameter is
#' set it to be value bigger than 0. It will use all trees by default. #' only valid for gbtree, but not for gblinear. set it to be value bigger
#' than 0. It will use all trees by default.
#' @examples #' @examples
#' data(iris) #' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' pred <- predict(bst, as.matrix(iris[,1:4])) #' pred <- predict(bst, as.matrix(iris[,1:4]))
#' @export #' @export
#' #'

View File

@ -8,7 +8,7 @@ setClass('xgb.DMatrix')
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' dsub <- slice(dtrain, 1:3) #' dsub <- slice(dtrain, 1:3)
#' @rdname slice #' @rdname slice

View File

@ -12,7 +12,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') #' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
#' dtrain <- xgb.DMatrix('iris.xgb.DMatrix') #' dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -7,7 +7,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') #' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
#' dtrain <- xgb.DMatrix('iris.xgb.DMatrix') #' dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -13,7 +13,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' xgb.dump(bst, 'iris.xgb.model.dump') #' xgb.dump(bst, 'iris.xgb.model.dump')
#' @export #' @export
#' #'

View File

@ -6,7 +6,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' xgb.save(bst, 'iris.xgb.model') #' xgb.save(bst, 'iris.xgb.model')
#' bst <- xgb.load('iris.xgb.model') #' bst <- xgb.load('iris.xgb.model')
#' pred <- predict(bst, as.matrix(iris[,1:4])) #' pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -7,7 +7,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' xgb.save(bst, 'iris.xgb.model') #' xgb.save(bst, 'iris.xgb.model')
#' bst <- xgb.load('iris.xgb.model') #' bst <- xgb.load('iris.xgb.model')
#' pred <- predict(bst, as.matrix(iris[,1:4])) #' pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -44,7 +44,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' iris[,5] <- as.numeric(iris[,5]) #' iris[,5] <- as.numeric(iris[,5]=='setosa')
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' dtest <- dtrain #' dtest <- dtrain
#' watchlist <- list(eval = dtest, train = dtrain) #' watchlist <- list(eval = dtest, train = dtrain)

View File

@ -34,7 +34,7 @@
#' #'
#' @examples #' @examples
#' data(iris) #' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
#' pred <- predict(bst, as.matrix(iris[,1:4])) #' pred <- predict(bst, as.matrix(iris[,1:4]))
#' @export #' @export
#' #'

View File

@ -21,7 +21,7 @@ Get information of an xgb.DMatrix object
} }
\examples{ \examples{
data(iris) data(iris)
iris[,5] <- as.numeric(iris[,5]) iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
labels <- getinfo(dtrain, "label") labels <- getinfo(dtrain, "label")
} }

View File

@ -18,15 +18,16 @@ value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.} output value before logistic transformation.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. \item{ntreelimit}{limit number of trees used in prediction, this parameter is
set it to be value bigger than 0. It will use all trees by default.} only valid for gbtree, but not for gblinear. set it to be value bigger
than 0. It will use all trees by default.}
} }
\description{ \description{
Predicted values based on xgboost model object. Predicted values based on xgboost model object.
} }
\examples{ \examples{
data(iris) data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
pred <- predict(bst, as.matrix(iris[,1:4])) pred <- predict(bst, as.matrix(iris[,1:4]))
} }

View File

@ -23,7 +23,7 @@ orginal xgb.DMatrix object
} }
\examples{ \examples{
data(iris) data(iris)
iris[,5] <- as.numeric(iris[,5]) iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
dsub <- slice(dtrain, 1:3) dsub <- slice(dtrain, 1:3)
} }

View File

@ -20,7 +20,7 @@ Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
} }
\examples{ \examples{
data(iris) data(iris)
iris[,5] <- as.numeric(iris[,5]) iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
dtrain <- xgb.DMatrix('iris.xgb.DMatrix') dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -15,7 +15,7 @@ Save xgb.DMatrix object to binary file
} }
\examples{ \examples{
data(iris) data(iris)
iris[,5] <- as.numeric(iris[,5]) iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
dtrain <- xgb.DMatrix('iris.xgb.DMatrix') dtrain <- xgb.DMatrix('iris.xgb.DMatrix')

View File

@ -21,7 +21,7 @@ Save a xgboost model to text file. Could be parsed later.
} }
\examples{ \examples{
data(iris) data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
xgb.dump(bst, 'iris.xgb.model.dump') xgb.dump(bst, 'iris.xgb.model.dump')
} }

View File

@ -13,7 +13,7 @@ Load xgboost model from the binary model file
} }
\examples{ \examples{
data(iris) data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
xgb.save(bst, 'iris.xgb.model') xgb.save(bst, 'iris.xgb.model')
bst <- xgb.load('iris.xgb.model') bst <- xgb.load('iris.xgb.model')
pred <- predict(bst, as.matrix(iris[,1:4])) pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -15,7 +15,7 @@ Save xgboost model from xgboost or xgb.train
} }
\examples{ \examples{
data(iris) data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
xgb.save(bst, 'iris.xgb.model') xgb.save(bst, 'iris.xgb.model')
bst <- xgb.load('iris.xgb.model') bst <- xgb.load('iris.xgb.model')
pred <- predict(bst, as.matrix(iris[,1:4])) pred <- predict(bst, as.matrix(iris[,1:4]))

View File

@ -56,7 +56,7 @@ therefore it is more flexible than \code{\link{xgboost}}.
} }
\examples{ \examples{
data(iris) data(iris)
iris[,5] <- as.numeric(iris[,5]) iris[,5] <- as.numeric(iris[,5]=='setosa')
dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
dtest <- dtrain dtest <- dtrain
watchlist <- list(eval = dtest, train = dtrain) watchlist <- list(eval = dtest, train = dtrain)

View File

@ -46,7 +46,7 @@ Number of threads can also be manually specified via "nthread" parameter
} }
\examples{ \examples{
data(iris) data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
pred <- predict(bst, as.matrix(iris[,1:4])) pred <- predict(bst, as.matrix(iris[,1:4]))
} }

View File

@ -80,7 +80,7 @@ In this section, we will illustrate some common usage of \verb@xgboost@.
<<Training and prediction with iris>>= <<Training and prediction with iris>>=
library(xgboost) library(xgboost)
data(iris) data(iris)
bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'),
nrounds = 5) nrounds = 5)
xgb.save(bst, 'model.save') xgb.save(bst, 'model.save')
bst = xgb.load('model.save') bst = xgb.load('model.save')
@ -121,7 +121,7 @@ training from initial prediction value, weighted training instance.
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object: We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
<<xgb.DMatrix>>= <<xgb.DMatrix>>=
iris.mat <- as.matrix(iris[,1:4]) iris.mat <- as.matrix(iris[,1:4])
iris.label <- as.numeric(iris[,5]) iris.label <- as.numeric(iris[,5]=='setosa')
diris <- xgb.DMatrix(iris.mat, label = iris.label) diris <- xgb.DMatrix(iris.mat, label = iris.label)
class(diris) class(diris)
getinfo(diris,'label') getinfo(diris,'label')

View File

@ -0,0 +1,53 @@
require(xgboost)
dtrain <- xgb.DMatrix('../data/agaricus.txt.train')
dtest <- xgb.DMatrix('../data/agaricus.txt.test')
param <- list(max_depth=2,eta=1,silent=1,objective='binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
num_round <- 2
bst <- xgb.train(param, dtrain, num_round, watchlist)
preds <- predict(bst, dtest)
labels <- getinfo(dtest,'label')
cat('error=', mean(as.numeric(preds>0.5)!=labels),'\n')
xgb.save(bst, 'xgb.model')
xgb.dump(bst, 'dump.raw.txt')
xgb.dump(bst, 'dump.nuce.txt','../data/featmap.txt')
bst2 <- xgb.load('xgb.model')
preds2 <- predict(bst2,dtest)
stopifnot(sum((preds-preds2)^2)==0)
cat('start running example of build DMatrix from scipy.sparse CSR Matrix\n')
read.libsvm <- function(fname, maxcol) {
content <- readLines(fname)
nline <- length(content)
label <- numeric(nline)
mat <- matrix(0, nline, maxcol + 1)
for (i in 1:nline) {
arr <- as.vector(strsplit(content[i], " ")[[1]])
label[i] <- as.numeric(arr[[1]])
for (j in 2:length(arr)) {
kv <- strsplit(arr[j], ":")[[1]]
# to avoid 0 index
findex <- as.integer(kv[1]) + 1
fvalue <- as.numeric(kv[2])
mat[i, findex] <- fvalue
}
}
mat <- as(mat, "sparseMatrix")
return(list(label = label, data = mat))
}
csc <- read.libsvm("../data/agaricus.txt.train", 126)
y <- csc$label
x <- csc$data
class(x)
dtrain <- xgb.DMatrix(x, label = y)
bst <- xgb.train(param, dtrain, num_round, watchlist)
cat('start running example of build DMatrix from numpy array\n')
x <- as.matrix(x)
class(x)
dtrain <- xgb.DMatrix(x, label = y)
bst <- xgb.train(param, dtrain, num_round, watchlist)