diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R index 3a79fd2fb..2a7ae8e5e 100644 --- a/R-package/R/getinfo.xgb.DMatrix.R +++ b/R-package/R/getinfo.xgb.DMatrix.R @@ -6,7 +6,7 @@ setClass('xgb.DMatrix') #' #' @examples #' data(iris) -#' iris[,5] <- as.numeric(iris[,5]) +#' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' labels <- getinfo(dtrain, "label") #' @rdname getinfo diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R index 390ac689e..a41b26873 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/predict.xgb.Booster.R @@ -11,11 +11,12 @@ setClass("xgb.Booster") #' value of sum of functions, when outputmargin=TRUE, the prediction is #' untransformed margin value. In logistic regression, outputmargin=T will #' output value before logistic transformation. -#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. -#' set it to be value bigger than 0. It will use all trees by default. +#' @param ntreelimit limit number of trees used in prediction, this parameter is +#' only valid for gbtree, but not for gblinear. set it to be value bigger +#' than 0. It will use all trees by default. #' @examples #' data(iris) -#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) #' pred <- predict(bst, as.matrix(iris[,1:4])) #' @export #' diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R index 8a93efc4d..72f94893a 100644 --- a/R-package/R/slice.xgb.DMatrix.R +++ b/R-package/R/slice.xgb.DMatrix.R @@ -8,7 +8,7 @@ setClass('xgb.DMatrix') #' #' @examples #' data(iris) -#' iris[,5] <- as.numeric(iris[,5]) +#' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dsub <- slice(dtrain, 1:3) #' @rdname slice diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index d52847ef2..3b320d73f 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -12,7 +12,7 @@ #' #' @examples #' data(iris) -#' iris[,5] <- as.numeric(iris[,5]) +#' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') #' dtrain <- xgb.DMatrix('iris.xgb.DMatrix') diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R index 4fcb71301..4f4f49399 100644 --- a/R-package/R/xgb.DMatrix.save.R +++ b/R-package/R/xgb.DMatrix.save.R @@ -7,7 +7,7 @@ #' #' @examples #' data(iris) -#' iris[,5] <- as.numeric(iris[,5]) +#' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') #' dtrain <- xgb.DMatrix('iris.xgb.DMatrix') diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index 09406dc99..78fcf4d0b 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -13,7 +13,7 @@ #' #' @examples #' data(iris) -#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) #' xgb.dump(bst, 'iris.xgb.model.dump') #' @export #' diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R index 626c08d0d..54afe65dd 100644 --- a/R-package/R/xgb.load.R +++ b/R-package/R/xgb.load.R @@ -6,7 +6,7 @@ #' #' @examples #' data(iris) -#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) #' xgb.save(bst, 'iris.xgb.model') #' bst <- xgb.load('iris.xgb.model') #' pred <- predict(bst, as.matrix(iris[,1:4])) diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index 64add0ca9..c211429ad 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -7,7 +7,7 @@ #' #' @examples #' data(iris) -#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) #' xgb.save(bst, 'iris.xgb.model') #' bst <- xgb.load('iris.xgb.model') #' pred <- predict(bst, as.matrix(iris[,1:4])) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index d29bad569..5a7b03090 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -44,7 +44,7 @@ #' #' @examples #' data(iris) -#' iris[,5] <- as.numeric(iris[,5]) +#' iris[,5] <- as.numeric(iris[,5]=='setosa') #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) #' dtest <- dtrain #' watchlist <- list(eval = dtest, train = dtrain) diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index f3b5c66ec..e8888afcd 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -34,7 +34,7 @@ #' #' @examples #' data(iris) -#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) #' pred <- predict(bst, as.matrix(iris[,1:4])) #' @export #' diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 05a25c152..7206d6b17 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -21,7 +21,7 @@ Get information of an xgb.DMatrix object } \examples{ data(iris) -iris[,5] <- as.numeric(iris[,5]) +iris[,5] <- as.numeric(iris[,5]=='setosa') dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) labels <- getinfo(dtrain, "label") } diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd index d192997d2..9c19b8f33 100644 --- a/R-package/man/predict-xgb.Booster-method.Rd +++ b/R-package/man/predict-xgb.Booster-method.Rd @@ -18,15 +18,16 @@ value of sum of functions, when outputmargin=TRUE, the prediction is untransformed margin value. In logistic regression, outputmargin=T will output value before logistic transformation.} -\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear. -set it to be value bigger than 0. It will use all trees by default.} +\item{ntreelimit}{limit number of trees used in prediction, this parameter is +only valid for gbtree, but not for gblinear. set it to be value bigger +than 0. It will use all trees by default.} } \description{ Predicted values based on xgboost model object. } \examples{ data(iris) -bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) pred <- predict(bst, as.matrix(iris[,1:4])) } diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd index 7acb14a32..a4d0a4568 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.Rd @@ -23,7 +23,7 @@ orginal xgb.DMatrix object } \examples{ data(iris) -iris[,5] <- as.numeric(iris[,5]) +iris[,5] <- as.numeric(iris[,5]=='setosa') dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dsub <- slice(dtrain, 1:3) } diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd index 166d69f68..ea7ff8ce6 100644 --- a/R-package/man/xgb.DMatrix.Rd +++ b/R-package/man/xgb.DMatrix.Rd @@ -20,7 +20,7 @@ Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file. } \examples{ data(iris) -iris[,5] <- as.numeric(iris[,5]) +iris[,5] <- as.numeric(iris[,5]=='setosa') dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') dtrain <- xgb.DMatrix('iris.xgb.DMatrix') diff --git a/R-package/man/xgb.DMatrix.save.Rd b/R-package/man/xgb.DMatrix.save.Rd index e5e70501d..2692069dc 100644 --- a/R-package/man/xgb.DMatrix.save.Rd +++ b/R-package/man/xgb.DMatrix.save.Rd @@ -15,7 +15,7 @@ Save xgb.DMatrix object to binary file } \examples{ data(iris) -iris[,5] <- as.numeric(iris[,5]) +iris[,5] <- as.numeric(iris[,5]=='setosa') dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix') dtrain <- xgb.DMatrix('iris.xgb.DMatrix') diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index 4d6933811..a4ac12cd4 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -21,7 +21,7 @@ Save a xgboost model to text file. Could be parsed later. } \examples{ data(iris) -bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) xgb.dump(bst, 'iris.xgb.model.dump') } diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd index 980daf88d..a8969c07d 100644 --- a/R-package/man/xgb.load.Rd +++ b/R-package/man/xgb.load.Rd @@ -13,7 +13,7 @@ Load xgboost model from the binary model file } \examples{ data(iris) -bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) xgb.save(bst, 'iris.xgb.model') bst <- xgb.load('iris.xgb.model') pred <- predict(bst, as.matrix(iris[,1:4])) diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd index ba390d1b4..0dca58287 100644 --- a/R-package/man/xgb.save.Rd +++ b/R-package/man/xgb.save.Rd @@ -15,7 +15,7 @@ Save xgboost model from xgboost or xgb.train } \examples{ data(iris) -bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) xgb.save(bst, 'iris.xgb.model') bst <- xgb.load('iris.xgb.model') pred <- predict(bst, as.matrix(iris[,1:4])) diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 4da3b0013..75c43cd56 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -56,7 +56,7 @@ therefore it is more flexible than \code{\link{xgboost}}. } \examples{ data(iris) -iris[,5] <- as.numeric(iris[,5]) +iris[,5] <- as.numeric(iris[,5]=='setosa') dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) dtest <- dtrain watchlist <- list(eval = dtest, train = dtrain) diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd index 2b6c1a124..435423d28 100644 --- a/R-package/man/xgboost.Rd +++ b/R-package/man/xgboost.Rd @@ -46,7 +46,7 @@ Number of threads can also be manually specified via "nthread" parameter } \examples{ data(iris) -bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2) +bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2) pred <- predict(bst, as.matrix(iris[,1:4])) } diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw index 9ecceca17..45ab1a096 100644 --- a/R-package/vignettes/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -80,7 +80,7 @@ In this section, we will illustrate some common usage of \verb@xgboost@. <>= library(xgboost) data(iris) -bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), +bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 5) xgb.save(bst, 'model.save') bst = xgb.load('model.save') @@ -121,7 +121,7 @@ training from initial prediction value, weighted training instance. We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object: <>= iris.mat <- as.matrix(iris[,1:4]) -iris.label <- as.numeric(iris[,5]) +iris.label <- as.numeric(iris[,5]=='setosa') diris <- xgb.DMatrix(iris.mat, label = iris.label) class(diris) getinfo(diris,'label') diff --git a/demo/guide-R/basic_walkthrough.R b/demo/guide-R/basic_walkthrough.R new file mode 100644 index 000000000..959e5f0ed --- /dev/null +++ b/demo/guide-R/basic_walkthrough.R @@ -0,0 +1,53 @@ +require(xgboost) + +dtrain <- xgb.DMatrix('../data/agaricus.txt.train') +dtest <- xgb.DMatrix('../data/agaricus.txt.test') +param <- list(max_depth=2,eta=1,silent=1,objective='binary:logistic') +watchlist <- list(eval = dtest, train = dtrain) +num_round <- 2 +bst <- xgb.train(param, dtrain, num_round, watchlist) +preds <- predict(bst, dtest) +labels <- getinfo(dtest,'label') +cat('error=', mean(as.numeric(preds>0.5)!=labels),'\n') +xgb.save(bst, 'xgb.model') +xgb.dump(bst, 'dump.raw.txt') +xgb.dump(bst, 'dump.nuce.txt','../data/featmap.txt') + +bst2 <- xgb.load('xgb.model') +preds2 <- predict(bst2,dtest) +stopifnot(sum((preds-preds2)^2)==0) + + +cat('start running example of build DMatrix from scipy.sparse CSR Matrix\n') +read.libsvm <- function(fname, maxcol) { + content <- readLines(fname) + nline <- length(content) + label <- numeric(nline) + mat <- matrix(0, nline, maxcol + 1) + for (i in 1:nline) { + arr <- as.vector(strsplit(content[i], " ")[[1]]) + label[i] <- as.numeric(arr[[1]]) + for (j in 2:length(arr)) { + kv <- strsplit(arr[j], ":")[[1]] + # to avoid 0 index + findex <- as.integer(kv[1]) + 1 + fvalue <- as.numeric(kv[2]) + mat[i, findex] <- fvalue + } + } + mat <- as(mat, "sparseMatrix") + return(list(label = label, data = mat)) +} +csc <- read.libsvm("../data/agaricus.txt.train", 126) +y <- csc$label +x <- csc$data +class(x) +dtrain <- xgb.DMatrix(x, label = y) +bst <- xgb.train(param, dtrain, num_round, watchlist) + +cat('start running example of build DMatrix from numpy array\n') +x <- as.matrix(x) +class(x) +dtrain <- xgb.DMatrix(x, label = y) +bst <- xgb.train(param, dtrain, num_round, watchlist) +