add leaf example in R

This commit is contained in:
hetong 2015-01-19 10:34:14 -08:00
parent 312546b99d
commit 43c13d82ba
2 changed files with 35 additions and 3 deletions

View File

@ -11,7 +11,7 @@ setClass("xgb.Booster")
#' value of sum of functions, when outputmargin=TRUE, the prediction is #' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will #' untransformed margin value. In logistic regression, outputmargin=T will
#' output value before logistic transformation. #' output value before logistic transformation.
#' @param predleaf whether predict leaf index instead #' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object.
#' @param ntreelimit limit number of trees used in prediction, this parameter is #' @param ntreelimit limit number of trees used in prediction, this parameter is
#' only valid for gbtree, but not for gblinear. set it to be value bigger #' only valid for gbtree, but not for gblinear. set it to be value bigger
#' than 0. It will use all trees by default. #' than 0. It will use all trees by default.
@ -26,7 +26,8 @@ setClass("xgb.Booster")
#' @export #' @export
#' #'
setMethod("predict", signature = "xgb.Booster", setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NULL, outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) { definition = function(object, newdata, missing = NULL,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
if (class(newdata) != "xgb.DMatrix") { if (class(newdata) != "xgb.DMatrix") {
if (is.null(missing)) { if (is.null(missing)) {
newdata <- xgb.DMatrix(newdata) newdata <- xgb.DMatrix(newdata)
@ -48,7 +49,16 @@ setMethod("predict", signature = "xgb.Booster",
if (predleaf) { if (predleaf) {
option <- option + 2 option <- option + 2
} }
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(option), as.integer(ntreelimit), PACKAGE = "xgboost") ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(option),
as.integer(ntreelimit), PACKAGE = "xgboost")
if (predleaf){
if (length(ret) == nrow(newdata)){
ret <- matrix(ret,ncol = 1)
} else {
ret <- matrix(ret, ncol = nrow(newdata))
ret <- t(ret)
}
}
return(ret) return(ret)
}) })

View File

@ -0,0 +1,22 @@
require(xgboost)
# load in the agaricus dataset
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
nround = 5
# training the model for two rounds
bst = xgb.train(param, dtrain, nround, watchlist)
cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest,'label')
### predict using first 2 tree
pred_with_leaf = predict(bst, dtest, ntreelimit = 2, predleaf = TRUE)
head(pred_with_leaf)
# by default, we predict using all the trees
pred_with_leaf = predict(bst, dtest, predleaf = TRUE)
head(pred_with_leaf)