From d776e0fdf54b5c86d8ac2547e09f54423d92fd34 Mon Sep 17 00:00:00 2001
From: hetong <hetong007@gmail.com>
Date: Fri, 5 Sep 2014 19:22:27 -0700
Subject: [PATCH 1/5] fix iris multiclass problem

---
 R-package/R/getinfo.xgb.DMatrix.R | 2 +-
 R-package/R/predict.xgb.Booster.R | 7 ++++---
 R-package/R/slice.xgb.DMatrix.R   | 2 +-
 R-package/R/xgb.DMatrix.R         | 2 +-
 R-package/R/xgb.DMatrix.save.R    | 2 +-
 R-package/R/xgb.dump.R            | 2 +-
 R-package/R/xgb.load.R            | 2 +-
 R-package/R/xgb.save.R            | 2 +-
 R-package/R/xgb.train.R           | 2 +-
 R-package/R/xgboost.R             | 2 +-
 R-package/vignettes/xgboost.Rnw   | 4 ++--
 11 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R
index 3a79fd2fb..2a7ae8e5e 100644
--- a/R-package/R/getinfo.xgb.DMatrix.R
+++ b/R-package/R/getinfo.xgb.DMatrix.R
@@ -6,7 +6,7 @@ setClass('xgb.DMatrix')
 #' 
 #' @examples
 #' data(iris)
-#' iris[,5] <- as.numeric(iris[,5])
+#' iris[,5] <- as.numeric(iris[,5]=='setosa')
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' labels <- getinfo(dtrain, "label")
 #' @rdname getinfo
diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R
index 390ac689e..a41b26873 100644
--- a/R-package/R/predict.xgb.Booster.R
+++ b/R-package/R/predict.xgb.Booster.R
@@ -11,11 +11,12 @@ setClass("xgb.Booster")
 #'   value of sum of functions, when outputmargin=TRUE, the prediction is 
 #'   untransformed margin value. In logistic regression, outputmargin=T will
 #'   output value before logistic transformation.
-#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
-#'   set it to be value bigger than 0. It will use all trees by default.
+#' @param ntreelimit limit number of trees used in prediction, this parameter is
+#'  only valid for gbtree, but not for gblinear. set it to be value bigger 
+#'  than 0. It will use all trees by default.
 #' @examples
 #' data(iris)
-#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 #' pred <- predict(bst, as.matrix(iris[,1:4]))
 #' @export
 #' 
diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R
index 8a93efc4d..72f94893a 100644
--- a/R-package/R/slice.xgb.DMatrix.R
+++ b/R-package/R/slice.xgb.DMatrix.R
@@ -8,7 +8,7 @@ setClass('xgb.DMatrix')
 #' 
 #' @examples
 #' data(iris)
-#' iris[,5] <- as.numeric(iris[,5])
+#' iris[,5] <- as.numeric(iris[,5]=='setosa')
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' dsub <- slice(dtrain, 1:3)
 #' @rdname slice
diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R
index d52847ef2..3b320d73f 100644
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -12,7 +12,7 @@
 #' 
 #' @examples
 #' data(iris)
-#' iris[,5] <- as.numeric(iris[,5])
+#' iris[,5] <- as.numeric(iris[,5]=='setosa')
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
 #' dtrain <- xgb.DMatrix('iris.xgb.DMatrix')
diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R
index 4fcb71301..4f4f49399 100644
--- a/R-package/R/xgb.DMatrix.save.R
+++ b/R-package/R/xgb.DMatrix.save.R
@@ -7,7 +7,7 @@
 #' 
 #' @examples
 #' data(iris)
-#' iris[,5] <- as.numeric(iris[,5])
+#' iris[,5] <- as.numeric(iris[,5]=='setosa')
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
 #' dtrain <- xgb.DMatrix('iris.xgb.DMatrix')
diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R
index 09406dc99..78fcf4d0b 100644
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -13,7 +13,7 @@
 #'
 #' @examples
 #' data(iris)
-#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 #' xgb.dump(bst, 'iris.xgb.model.dump')
 #' @export
 #' 
diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R
index 626c08d0d..54afe65dd 100644
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -6,7 +6,7 @@
 #' 
 #' @examples
 #' data(iris)
-#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 #' xgb.save(bst, 'iris.xgb.model')
 #' bst <- xgb.load('iris.xgb.model')
 #' pred <- predict(bst, as.matrix(iris[,1:4]))
diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R
index 64add0ca9..c211429ad 100644
--- a/R-package/R/xgb.save.R
+++ b/R-package/R/xgb.save.R
@@ -7,7 +7,7 @@
 #' 
 #' @examples
 #' data(iris)
-#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 #' xgb.save(bst, 'iris.xgb.model')
 #' bst <- xgb.load('iris.xgb.model')
 #' pred <- predict(bst, as.matrix(iris[,1:4]))
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 58a575d03..e5400829f 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -44,7 +44,7 @@
 #' 
 #' @examples
 #' data(iris)
-#' iris[,5] <- as.numeric(iris[,5])
+#' iris[,5] <- as.numeric(iris[,5]=='setosa')
 #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 #' dtest <- dtrain
 #' watchlist <- list(eval = dtest, train = dtrain)
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index 6f4633fb8..dc8b17fa0 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -34,7 +34,7 @@
 #' 
 #' @examples
 #' data(iris)
-#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 #' pred <- predict(bst, as.matrix(iris[,1:4]))
 #' @export
 #' 
diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw
index 9ecceca17..45ab1a096 100644
--- a/R-package/vignettes/xgboost.Rnw
+++ b/R-package/vignettes/xgboost.Rnw
@@ -80,7 +80,7 @@ In this section, we will illustrate some common usage of \verb@xgboost@.
 <<Training and prediction with iris>>=
 library(xgboost)
 data(iris)
-bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), 
+bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), 
                nrounds = 5)
 xgb.save(bst, 'model.save')
 bst = xgb.load('model.save')
@@ -121,7 +121,7 @@ training from initial prediction value, weighted training instance.
 We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
 <<xgb.DMatrix>>=
 iris.mat <- as.matrix(iris[,1:4])
-iris.label <- as.numeric(iris[,5])
+iris.label <- as.numeric(iris[,5]=='setosa')
 diris <- xgb.DMatrix(iris.mat, label = iris.label)
 class(diris)
 getinfo(diris,'label')

From 801a17fa02cf4357caf292724b876f0c8807e7dc Mon Sep 17 00:00:00 2001
From: hetong <hetong007@gmail.com>
Date: Fri, 5 Sep 2014 19:47:58 -0700
Subject: [PATCH 2/5] fix iris to Rd files

---
 R-package/man/getinfo.Rd                    |   2 +-
 R-package/man/predict-xgb.Booster-method.Rd |   7 +-
 R-package/man/slice.Rd                      |   2 +-
 R-package/man/xgb.DMatrix.Rd                |   2 +-
 R-package/man/xgb.DMatrix.save.Rd           |   2 +-
 R-package/man/xgb.dump.Rd                   |   2 +-
 R-package/man/xgb.load.Rd                   |   2 +-
 R-package/man/xgb.save.Rd                   |   2 +-
 R-package/man/xgb.train.Rd                  |   2 +-
 R-package/man/xgboost.Rd                    |   2 +-
 R-package/vignettes/xgboost.aux             |  28 ++
 R-package/vignettes/xgboost.bbl             |  24 ++
 R-package/vignettes/xgboost.blg             |  47 +++
 R-package/vignettes/xgboost.out             |   4 +
 R-package/vignettes/xgboost.tex             | 319 ++++++++++++++++++++
 15 files changed, 435 insertions(+), 12 deletions(-)
 create mode 100644 R-package/vignettes/xgboost.aux
 create mode 100644 R-package/vignettes/xgboost.bbl
 create mode 100644 R-package/vignettes/xgboost.blg
 create mode 100644 R-package/vignettes/xgboost.out
 create mode 100644 R-package/vignettes/xgboost.tex

diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
index 05a25c152..7206d6b17 100644
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -21,7 +21,7 @@ Get information of an xgb.DMatrix object
 }
 \examples{
 data(iris)
-iris[,5] <- as.numeric(iris[,5])
+iris[,5] <- as.numeric(iris[,5]=='setosa')
 dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 labels <- getinfo(dtrain, "label")
 }
diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd
index d192997d2..9c19b8f33 100644
--- a/R-package/man/predict-xgb.Booster-method.Rd
+++ b/R-package/man/predict-xgb.Booster-method.Rd
@@ -18,15 +18,16 @@ value of sum of functions, when outputmargin=TRUE, the prediction is
 untransformed margin value. In logistic regression, outputmargin=T will
 output value before logistic transformation.}
 
-\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
-set it to be value bigger than 0. It will use all trees by default.}
+\item{ntreelimit}{limit number of trees used in prediction, this parameter is
+only valid for gbtree, but not for gblinear. set it to be value bigger
+than 0. It will use all trees by default.}
 }
 \description{
 Predicted values based on xgboost model object.
 }
 \examples{
 data(iris)
-bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 pred <- predict(bst, as.matrix(iris[,1:4]))
 }
 
diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd
index 7acb14a32..a4d0a4568 100644
--- a/R-package/man/slice.Rd
+++ b/R-package/man/slice.Rd
@@ -23,7 +23,7 @@ orginal xgb.DMatrix object
 }
 \examples{
 data(iris)
-iris[,5] <- as.numeric(iris[,5])
+iris[,5] <- as.numeric(iris[,5]=='setosa')
 dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 dsub <- slice(dtrain, 1:3)
 }
diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd
index 166d69f68..ea7ff8ce6 100644
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -20,7 +20,7 @@ Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
 }
 \examples{
 data(iris)
-iris[,5] <- as.numeric(iris[,5])
+iris[,5] <- as.numeric(iris[,5]=='setosa')
 dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
 dtrain <- xgb.DMatrix('iris.xgb.DMatrix')
diff --git a/R-package/man/xgb.DMatrix.save.Rd b/R-package/man/xgb.DMatrix.save.Rd
index e5e70501d..2692069dc 100644
--- a/R-package/man/xgb.DMatrix.save.Rd
+++ b/R-package/man/xgb.DMatrix.save.Rd
@@ -15,7 +15,7 @@ Save xgb.DMatrix object to binary file
 }
 \examples{
 data(iris)
-iris[,5] <- as.numeric(iris[,5])
+iris[,5] <- as.numeric(iris[,5]=='setosa')
 dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 xgb.DMatrix.save(dtrain, 'iris.xgb.DMatrix')
 dtrain <- xgb.DMatrix('iris.xgb.DMatrix')
diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd
index 4d6933811..a4ac12cd4 100644
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@@ -21,7 +21,7 @@ Save a xgboost model to text file. Could be parsed later.
 }
 \examples{
 data(iris)
-bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 xgb.dump(bst, 'iris.xgb.model.dump')
 }
 
diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd
index 980daf88d..a8969c07d 100644
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -13,7 +13,7 @@ Load xgboost model from the binary model file
 }
 \examples{
 data(iris)
-bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 xgb.save(bst, 'iris.xgb.model')
 bst <- xgb.load('iris.xgb.model')
 pred <- predict(bst, as.matrix(iris[,1:4]))
diff --git a/R-package/man/xgb.save.Rd b/R-package/man/xgb.save.Rd
index ba390d1b4..0dca58287 100644
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -15,7 +15,7 @@ Save xgboost model from xgboost or xgb.train
 }
 \examples{
 data(iris)
-bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 xgb.save(bst, 'iris.xgb.model')
 bst <- xgb.load('iris.xgb.model')
 pred <- predict(bst, as.matrix(iris[,1:4]))
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 4da3b0013..75c43cd56 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -56,7 +56,7 @@ therefore it is more flexible than \code{\link{xgboost}}.
 }
 \examples{
 data(iris)
-iris[,5] <- as.numeric(iris[,5])
+iris[,5] <- as.numeric(iris[,5]=='setosa')
 dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
 dtest <- dtrain
 watchlist <- list(eval = dtest, train = dtrain)
diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd
index 2b6c1a124..435423d28 100644
--- a/R-package/man/xgboost.Rd
+++ b/R-package/man/xgboost.Rd
@@ -46,7 +46,7 @@ Number of threads can also be manually specified via "nthread" parameter
 }
 \examples{
 data(iris)
-bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
+bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]=='setosa'), nrounds = 2)
 pred <- predict(bst, as.matrix(iris[,1:4]))
 }
 
diff --git a/R-package/vignettes/xgboost.aux b/R-package/vignettes/xgboost.aux
new file mode 100644
index 000000000..6e6babc4c
--- /dev/null
+++ b/R-package/vignettes/xgboost.aux
@@ -0,0 +1,28 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
+\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
+\global\let\oldcontentsline\contentsline
+\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
+\global\let\oldnewlabel\newlabel
+\gdef\newlabel#1#2{\newlabelxx{#1}#2}
+\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
+\AtEndDocument{\ifx\hyper@anchor\@undefined
+\let\contentsline\oldcontentsline
+\let\newlabel\oldnewlabel
+\fi}
+\fi}
+\global\let\hyper@last\relax 
+\gdef\HyperFirstAtBeginDocument#1{#1}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\citation{friedman2001greedy}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}}
+\@writefile{toc}{\contentsline {section}{\numberline {2}Example with iris}{1}{section.2}}
+\@writefile{toc}{\contentsline {section}{\numberline {3}Advanced Examples}{2}{section.3}}
+\bibstyle{jss}
+\citation{*}
+\bibdata{xgboost}
+\bibcite{friedman2000additive}{{1}{2000}{{Friedman \emph  {et~al.}}}{{Friedman, Hastie, Tibshirani \emph  {et~al.}}}}
+\bibcite{friedman2001greedy}{{2}{2001}{{Friedman}}{{}}}
+\@writefile{toc}{\contentsline {section}{\numberline {4}The Higgs Boson competition}{3}{section.4}}
diff --git a/R-package/vignettes/xgboost.bbl b/R-package/vignettes/xgboost.bbl
new file mode 100644
index 000000000..fdf58e763
--- /dev/null
+++ b/R-package/vignettes/xgboost.bbl
@@ -0,0 +1,24 @@
+\begin{thebibliography}{2}
+\newcommand{\enquote}[1]{``#1''}
+\providecommand{\natexlab}[1]{#1}
+\providecommand{\url}[1]{\texttt{#1}}
+\providecommand{\urlprefix}{URL }
+\expandafter\ifx\csname urlstyle\endcsname\relax
+  \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else
+  \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup
+  \urlstyle{rm}\Url}\fi
+\providecommand{\eprint}[2][]{\url{#2}}
+
+\bibitem[{Friedman \emph{et~al.}(2000)Friedman, Hastie, Tibshirani
+  \emph{et~al.}}]{friedman2000additive}
+Friedman J, Hastie T, Tibshirani R, \emph{et~al.} (2000).
+\newblock \enquote{Additive logistic regression: a statistical view of boosting
+  (with discussion and a rejoinder by the authors).}
+\newblock \emph{The annals of statistics}, \textbf{28}(2), 337--407.
+
+\bibitem[{Friedman(2001)}]{friedman2001greedy}
+Friedman JH (2001).
+\newblock \enquote{Greedy function approximation: a gradient boosting machine.}
+\newblock \emph{Annals of Statistics}, pp. 1189--1232.
+
+\end{thebibliography}
diff --git a/R-package/vignettes/xgboost.blg b/R-package/vignettes/xgboost.blg
new file mode 100644
index 000000000..2c0e87387
--- /dev/null
+++ b/R-package/vignettes/xgboost.blg
@@ -0,0 +1,47 @@
+This is BibTeX, Version 0.99d (TeX Live 2013/Debian)
+Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
+The top-level auxiliary file: xgboost.aux
+The style file: jss.bst
+Database file #1: xgboost.bib
+Reallocated wiz_functions (elt_size=4) to 6000 items from 3000.
+You've used 2 entries,
+            3140 wiz_defined-function locations,
+            641 strings with 5430 characters,
+and the built_in function-call counts, 1920 in all, are:
+= -- 162
+> -- 44
+< -- 2
++ -- 17
+- -- 15
+* -- 149
+:= -- 256
+add.period$ -- 8
+call.type$ -- 2
+change.case$ -- 12
+chr.to.int$ -- 2
+cite$ -- 2
+duplicate$ -- 171
+empty$ -- 175
+format.name$ -- 19
+if$ -- 395
+int.to.chr$ -- 1
+int.to.str$ -- 1
+missing$ -- 24
+newline$ -- 21
+num.names$ -- 8
+pop$ -- 51
+preamble$ -- 1
+purify$ -- 12
+quote$ -- 0
+skip$ -- 53
+stack$ -- 0
+substring$ -- 181
+swap$ -- 65
+text.length$ -- 1
+text.prefix$ -- 0
+top$ -- 0
+type$ -- 18
+warning$ -- 0
+while$ -- 16
+width$ -- 0
+write$ -- 36
diff --git a/R-package/vignettes/xgboost.out b/R-package/vignettes/xgboost.out
new file mode 100644
index 000000000..6d60796a3
--- /dev/null
+++ b/R-package/vignettes/xgboost.out
@@ -0,0 +1,4 @@
+\BOOKMARK [1][-]{section.1}{Introduction}{}% 1
+\BOOKMARK [1][-]{section.2}{Example with iris}{}% 2
+\BOOKMARK [1][-]{section.3}{Advanced Examples}{}% 3
+\BOOKMARK [1][-]{section.4}{The Higgs Boson competition}{}% 4
diff --git a/R-package/vignettes/xgboost.tex b/R-package/vignettes/xgboost.tex
new file mode 100644
index 000000000..0ed4015b7
--- /dev/null
+++ b/R-package/vignettes/xgboost.tex
@@ -0,0 +1,319 @@
+\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
+%% maxwidth is the original width if it is less than linewidth
+%% otherwise use linewidth (to make sure the graphics do not exceed the margin)
+\makeatletter
+\def\maxwidth{ %
+  \ifdim\Gin@nat@width>\linewidth
+    \linewidth
+  \else
+    \Gin@nat@width
+  \fi
+}
+\makeatother
+
+\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
+\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
+\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
+\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
+\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
+\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
+\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
+\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
+\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
+\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
+
+\usepackage{framed}
+\makeatletter
+\newenvironment{kframe}{%
+ \def\at@end@of@kframe{}%
+ \ifinner\ifhmode%
+  \def\at@end@of@kframe{\end{minipage}}%
+  \begin{minipage}{\columnwidth}%
+ \fi\fi%
+ \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
+ \colorbox{shadecolor}{##1}\hskip-\fboxsep
+     % There is no \\@totalrightmargin, so:
+     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
+ \MakeFramed {\advance\hsize-\width
+   \@totalleftmargin\z@ \linewidth\hsize
+   \@setminipage}}%
+ {\par\unskip\endMakeFramed%
+ \at@end@of@kframe}
+\makeatother
+
+\definecolor{shadecolor}{rgb}{.97, .97, .97}
+\definecolor{messagecolor}{rgb}{0, 0, 0}
+\definecolor{warningcolor}{rgb}{1, 0, 1}
+\definecolor{errorcolor}{rgb}{1, 0, 0}
+\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
+
+\usepackage{alltt}
+\RequirePackage{url}
+\usepackage{hyperref}
+\RequirePackage{amsmath}
+\RequirePackage{natbib}
+\RequirePackage[a4paper,lmargin={1.25in},rmargin={1.25in},tmargin={1in},bmargin={1in}]{geometry}
+
+\makeatletter
+% \VignetteIndexEntry{xgboost: eXtreme Gradient Boosting}
+%\VignetteKeywords{xgboost, gbm, gradient boosting machines}
+%\VignettePackage{xgboost}
+% \VignetteEngine{knitr::knitr}
+\makeatother
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+\begin{document}
+%\SweaveOpts{concordance=TRUE}
+
+
+
+%
+
+%
+
+    \begin{center}
+    \vspace*{6\baselineskip}
+    \rule{\textwidth}{1.6pt}\vspace*{-\baselineskip}\vspace*{2pt}
+    \rule{\textwidth}{0.4pt}\\[2\baselineskip]
+    {\LARGE \textbf{xgboost: eXtreme Gradient Boosting}}\\[1.2\baselineskip]
+    \rule{\textwidth}{0.4pt}\vspace*{-\baselineskip}\vspace{3.2pt}
+    \rule{\textwidth}{1.6pt}\\[2\baselineskip]
+    {\Large Tianqi Chen, Tong He}\\[\baselineskip]
+    {\large Package Version: 0.3-0}\\[\baselineskip]
+    {\large \today}\par
+    \vfill
+    \end{center}
+
+\thispagestyle{empty}
+
+\clearpage
+
+\setcounter{page}{1}
+
+\section{Introduction}
+
+This is an introductory document of using the \verb@xgboost@ package in R. 
+
+\verb@xgboost@ is short for eXtreme Gradient Boosting package. It is an efficient
+ and scalable implementation of gradient boosting framework by \citep{friedman2001greedy}. 
+The package includes efficient linear model solver and tree learning algorithm.
+It supports various objective functions, including regression, classification
+and ranking. The package is made to be extendible, so that users are also allowed to define their own objectives easily. It has several features:
+\begin{enumerate}
+    \item{Speed: }{\verb@xgboost@ can automatically do parallel computation on 
+    Windows and Linux, with openmp. It is generally over 10 times faster than
+    \verb@gbm@.}
+    \item{Input Type: }{\verb@xgboost@ takes several types of input data:}
+    \begin{itemize}
+        \item{Dense Matrix: }{R's dense matrix, i.e. \verb@matrix@}
+        \item{Sparse Matrix: }{R's sparse matrix \verb@Matrix::dgCMatrix@}
+        \item{Data File: }{Local data files}
+        \item{xgb.DMatrix: }{\verb@xgboost@'s own class. Recommended.}
+    \end{itemize}
+    \item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster 
+    and linear booster, and is optimized for sparse input.}
+    \item{Customization: }{\verb@xgboost@ supports customized objective function 
+    and evaluation function}
+    \item{Performance: }{\verb@xgboost@ has better performance on several different
+    datasets.}
+\end{enumerate}
+
+
+\section{Example with iris}
+
+In this section, we will illustrate some common usage of \verb@xgboost@.
+
+\begin{knitrout}
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
+\begin{alltt}
+\hlkwd{library}\hlstd{(xgboost)}
+\hlkwd{data}\hlstd{(iris)}
+\hlstd{bst} \hlkwb{<-} \hlkwd{xgboost}\hlstd{(}\hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{]),}\hlkwd{as.numeric}\hlstd{(iris[,}\hlnum{5}\hlstd{]}\hlopt{==}\hlstr{'setosa'}\hlstd{),}
+               \hlkwc{nrounds} \hlstd{=} \hlnum{5}\hlstd{)}
+\end{alltt}
+\begin{verbatim}
+## [0]	train-rmse:0.351971
+## [1]	train-rmse:0.247769
+## [2]	train-rmse:0.174418
+## [3]	train-rmse:0.122783
+## [4]	train-rmse:0.086435
+\end{verbatim}
+\begin{alltt}
+\hlkwd{xgb.save}\hlstd{(bst,} \hlstr{'model.save'}\hlstd{)}
+\end{alltt}
+\begin{verbatim}
+## [1] TRUE
+\end{verbatim}
+\begin{alltt}
+\hlstd{bst} \hlkwb{=} \hlkwd{xgb.load}\hlstd{(}\hlstr{'model.save'}\hlstd{)}
+\hlstd{pred} \hlkwb{<-} \hlkwd{predict}\hlstd{(bst,} \hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{]))}
+\end{alltt}
+\end{kframe}
+\end{knitrout}
+
+\verb@xgboost@ is the main function to train a \verb@Booster@, i.e. a model.
+\verb@predict@ does prediction on the model.
+
+Here we can save the model to a binary local file, and load it when needed.
+We can't inspect the trees inside. However we have another function to save the
+model in plain text. 
+\begin{knitrout}
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
+\begin{alltt}
+\hlkwd{xgb.dump}\hlstd{(bst,} \hlstr{'model.dump'}\hlstd{)}
+\end{alltt}
+\begin{verbatim}
+## [1] TRUE
+\end{verbatim}
+\end{kframe}
+\end{knitrout}
+
+The output looks like 
+
+\begin{verbatim}
+booster[0]:
+0:[f2<2.45] yes=1,no=2,missing=1
+    1:leaf=0.147059
+    2:[f3<1.65] yes=3,no=4,missing=3
+        3:leaf=0.464151
+        4:leaf=0.722449
+booster[1]:
+0:[f2<2.45] yes=1,no=2,missing=1
+    1:leaf=0.103806
+    2:[f2<4.85] yes=3,no=4,missing=3
+        3:leaf=0.316341
+        4:leaf=0.510365
+\end{verbatim}
+
+It is important to know \verb@xgboost@'s own data type: \verb@xgb.DMatrix@.
+It speeds up \verb@xgboost@, and is needed for advanced features such as 
+training from initial prediction value, weighted training instance. 
+
+We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
+\begin{knitrout}
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
+\begin{alltt}
+\hlstd{iris.mat} \hlkwb{<-} \hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{])}
+\hlstd{iris.label} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(iris[,}\hlnum{5}\hlstd{]}\hlopt{==}\hlstr{'setosa'}\hlstd{)}
+\hlstd{diris} \hlkwb{<-} \hlkwd{xgb.DMatrix}\hlstd{(iris.mat,} \hlkwc{label} \hlstd{= iris.label)}
+\hlkwd{class}\hlstd{(diris)}
+\end{alltt}
+\begin{verbatim}
+## [1] "xgb.DMatrix"
+\end{verbatim}
+\begin{alltt}
+\hlkwd{getinfo}\hlstd{(diris,}\hlstr{'label'}\hlstd{)}
+\end{alltt}
+\begin{verbatim}
+##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+##  [71] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+## [106] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+## [141] 0 0 0 0 0 0 0 0 0 0
+\end{verbatim}
+\end{kframe}
+\end{knitrout}
+
+We can also save the matrix to a binary file. Then load it simply with 
+\verb@xgb.DMatrix@
+\begin{knitrout}
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
+\begin{alltt}
+\hlkwd{xgb.DMatrix.save}\hlstd{(diris,} \hlstr{'iris.xgb.DMatrix'}\hlstd{)}
+\end{alltt}
+\begin{verbatim}
+## 150x4 matrix with 600 entries is saved to iris.xgb.DMatrix
+## [1] TRUE
+\end{verbatim}
+\begin{alltt}
+\hlstd{diris} \hlkwb{=} \hlkwd{xgb.DMatrix}\hlstd{(}\hlstr{'iris.xgb.DMatrix'}\hlstd{)}
+\end{alltt}
+\begin{verbatim}
+## 150x4 matrix with 600 entries is loaded from iris.xgb.DMatrix
+\end{verbatim}
+\end{kframe}
+\end{knitrout}
+
+\section{Advanced Examples}
+
+The function \verb@xgboost@ is a simple function with less parameter, in order
+to be R-friendly. The core training function is wrapped in \verb@xgb.train@. It is more flexible than \verb@xgboost@, but it requires users to read the document a bit more carefully.
+
+\verb@xgb.train@ only accept a \verb@xgb.DMatrix@ object as its input, while it supports advanced features as custom objective and evaluation functions.
+
+\begin{knitrout}
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
+\begin{alltt}
+\hlstd{logregobj} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{preds}\hlstd{,} \hlkwc{dtrain}\hlstd{) \{}
+   \hlstd{labels} \hlkwb{<-} \hlkwd{getinfo}\hlstd{(dtrain,} \hlstr{"label"}\hlstd{)}
+   \hlstd{preds} \hlkwb{<-} \hlnum{1}\hlopt{/}\hlstd{(}\hlnum{1} \hlopt{+} \hlkwd{exp}\hlstd{(}\hlopt{-}\hlstd{preds))}
+   \hlstd{grad} \hlkwb{<-} \hlstd{preds} \hlopt{-} \hlstd{labels}
+   \hlstd{hess} \hlkwb{<-} \hlstd{preds} \hlopt{*} \hlstd{(}\hlnum{1} \hlopt{-} \hlstd{preds)}
+   \hlkwd{return}\hlstd{(}\hlkwd{list}\hlstd{(}\hlkwc{grad} \hlstd{= grad,} \hlkwc{hess} \hlstd{= hess))}
+\hlstd{\}}
+
+\hlstd{evalerror} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{preds}\hlstd{,} \hlkwc{dtrain}\hlstd{) \{}
+  \hlstd{labels} \hlkwb{<-} \hlkwd{getinfo}\hlstd{(dtrain,} \hlstr{"label"}\hlstd{)}
+  \hlstd{err} \hlkwb{<-} \hlkwd{sqrt}\hlstd{(}\hlkwd{mean}\hlstd{((preds}\hlopt{-}\hlstd{labels)}\hlopt{^}\hlnum{2}\hlstd{))}
+  \hlkwd{return}\hlstd{(}\hlkwd{list}\hlstd{(}\hlkwc{metric} \hlstd{=} \hlstr{"MSE"}\hlstd{,} \hlkwc{value} \hlstd{= err))}
+\hlstd{\}}
+
+\hlstd{dtest} \hlkwb{<-} \hlkwd{slice}\hlstd{(diris,}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{)}
+\hlstd{watchlist} \hlkwb{<-} \hlkwd{list}\hlstd{(}\hlkwc{eval} \hlstd{= dtest,} \hlkwc{train} \hlstd{= diris)}
+\hlstd{param} \hlkwb{<-} \hlkwd{list}\hlstd{(}\hlkwc{max_depth} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{eta} \hlstd{=} \hlnum{1}\hlstd{,} \hlkwc{silent} \hlstd{=} \hlnum{1}\hlstd{)}
+
+\hlstd{bst} \hlkwb{<-} \hlkwd{xgb.train}\hlstd{(param, diris,} \hlkwc{nround} \hlstd{=} \hlnum{2}\hlstd{, watchlist, logregobj, evalerror)}
+\end{alltt}
+\begin{verbatim}
+## [1]	eval-MSE:1.601	train-MSE:1.76
+## [2]	eval-MSE:2.567	train-MSE:2.745
+\end{verbatim}
+\end{kframe}
+\end{knitrout}
+
+The gradient and second order gradient is required for the output of customized 
+objective function. 
+
+We also have \verb@slice@ for row extraction. It is useful in 
+cross-validation.
+
+For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further 
+details.
+
+\section{The Higgs Boson competition}
+
+We have made a demo for \href{http://www.kaggle.com/c/higgs-boson}{the Higgs 
+Boson Machine Learning Challenge}. 
+
+Here are the instructions to make a submission
+\begin{enumerate}
+    \item Download the \href{http://www.kaggle.com/c/higgs-boson/data}{datasets}
+    and extract them to \verb@data/@.
+    \item Run scripts under \verb@xgboost/demo/kaggle-higgs/@: 
+    \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{higgs-train.R} 
+    and \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-pred.R}{higgs-pred.R}. 
+    The computation will take less than a minute on Intel i7. 
+    \item Go to the \href{http://www.kaggle.com/c/higgs-boson/submissions/attach}{submission page} 
+    and submit your result.
+\end{enumerate}
+
+We provide \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R}{a script}
+to compare the time cost on the higgs dataset with \verb@gbm@ and \verb@xgboost@. 
+The training set contains 350000 records and 30 features. 
+
+\verb@xgboost@ can automatically do parallel computation. On a machine with Intel
+i7-4700MQ and 24GB memories, we found that \verb@xgboost@ costs about 35 seconds, which is about 20 times faster
+than \verb@gbm@. When we limited \verb@xgboost@ to use only one thread, it was 
+still about two times faster than \verb@gbm@. 
+
+Meanwhile, the result from \verb@xgboost@ reaches 
+\href{http://www.kaggle.com/c/higgs-boson/details/evaluation}{3.60@AMS} with a 
+single model. This results stands in the 
+\href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the 
+competition. 
+
+\bibliographystyle{jss}
+\nocite{*} % list uncited references
+\bibliography{xgboost}
+
+\end{document}
+

From de08c5a3da691dd2be10ee4d45c5457eeb31e740 Mon Sep 17 00:00:00 2001
From: hetong <hetong007@gmail.com>
Date: Fri, 5 Sep 2014 19:49:25 -0700
Subject: [PATCH 3/5] remove temp files

---
 R-package/vignettes/xgboost.aux |  28 ---
 R-package/vignettes/xgboost.bbl |  24 ---
 R-package/vignettes/xgboost.blg |  47 -----
 R-package/vignettes/xgboost.out |   4 -
 R-package/vignettes/xgboost.tex | 319 --------------------------------
 5 files changed, 422 deletions(-)
 delete mode 100644 R-package/vignettes/xgboost.aux
 delete mode 100644 R-package/vignettes/xgboost.bbl
 delete mode 100644 R-package/vignettes/xgboost.blg
 delete mode 100644 R-package/vignettes/xgboost.out
 delete mode 100644 R-package/vignettes/xgboost.tex

diff --git a/R-package/vignettes/xgboost.aux b/R-package/vignettes/xgboost.aux
deleted file mode 100644
index 6e6babc4c..000000000
--- a/R-package/vignettes/xgboost.aux
+++ /dev/null
@@ -1,28 +0,0 @@
-\relax 
-\providecommand\hyper@newdestlabel[2]{}
-\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
-\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
-\global\let\oldcontentsline\contentsline
-\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
-\global\let\oldnewlabel\newlabel
-\gdef\newlabel#1#2{\newlabelxx{#1}#2}
-\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
-\AtEndDocument{\ifx\hyper@anchor\@undefined
-\let\contentsline\oldcontentsline
-\let\newlabel\oldnewlabel
-\fi}
-\fi}
-\global\let\hyper@last\relax 
-\gdef\HyperFirstAtBeginDocument#1{#1}
-\providecommand\HyField@AuxAddToFields[1]{}
-\providecommand\HyField@AuxAddToCoFields[2]{}
-\citation{friedman2001greedy}
-\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}}
-\@writefile{toc}{\contentsline {section}{\numberline {2}Example with iris}{1}{section.2}}
-\@writefile{toc}{\contentsline {section}{\numberline {3}Advanced Examples}{2}{section.3}}
-\bibstyle{jss}
-\citation{*}
-\bibdata{xgboost}
-\bibcite{friedman2000additive}{{1}{2000}{{Friedman \emph  {et~al.}}}{{Friedman, Hastie, Tibshirani \emph  {et~al.}}}}
-\bibcite{friedman2001greedy}{{2}{2001}{{Friedman}}{{}}}
-\@writefile{toc}{\contentsline {section}{\numberline {4}The Higgs Boson competition}{3}{section.4}}
diff --git a/R-package/vignettes/xgboost.bbl b/R-package/vignettes/xgboost.bbl
deleted file mode 100644
index fdf58e763..000000000
--- a/R-package/vignettes/xgboost.bbl
+++ /dev/null
@@ -1,24 +0,0 @@
-\begin{thebibliography}{2}
-\newcommand{\enquote}[1]{``#1''}
-\providecommand{\natexlab}[1]{#1}
-\providecommand{\url}[1]{\texttt{#1}}
-\providecommand{\urlprefix}{URL }
-\expandafter\ifx\csname urlstyle\endcsname\relax
-  \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else
-  \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup
-  \urlstyle{rm}\Url}\fi
-\providecommand{\eprint}[2][]{\url{#2}}
-
-\bibitem[{Friedman \emph{et~al.}(2000)Friedman, Hastie, Tibshirani
-  \emph{et~al.}}]{friedman2000additive}
-Friedman J, Hastie T, Tibshirani R, \emph{et~al.} (2000).
-\newblock \enquote{Additive logistic regression: a statistical view of boosting
-  (with discussion and a rejoinder by the authors).}
-\newblock \emph{The annals of statistics}, \textbf{28}(2), 337--407.
-
-\bibitem[{Friedman(2001)}]{friedman2001greedy}
-Friedman JH (2001).
-\newblock \enquote{Greedy function approximation: a gradient boosting machine.}
-\newblock \emph{Annals of Statistics}, pp. 1189--1232.
-
-\end{thebibliography}
diff --git a/R-package/vignettes/xgboost.blg b/R-package/vignettes/xgboost.blg
deleted file mode 100644
index 2c0e87387..000000000
--- a/R-package/vignettes/xgboost.blg
+++ /dev/null
@@ -1,47 +0,0 @@
-This is BibTeX, Version 0.99d (TeX Live 2013/Debian)
-Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
-The top-level auxiliary file: xgboost.aux
-The style file: jss.bst
-Database file #1: xgboost.bib
-Reallocated wiz_functions (elt_size=4) to 6000 items from 3000.
-You've used 2 entries,
-            3140 wiz_defined-function locations,
-            641 strings with 5430 characters,
-and the built_in function-call counts, 1920 in all, are:
-= -- 162
-> -- 44
-< -- 2
-+ -- 17
-- -- 15
-* -- 149
-:= -- 256
-add.period$ -- 8
-call.type$ -- 2
-change.case$ -- 12
-chr.to.int$ -- 2
-cite$ -- 2
-duplicate$ -- 171
-empty$ -- 175
-format.name$ -- 19
-if$ -- 395
-int.to.chr$ -- 1
-int.to.str$ -- 1
-missing$ -- 24
-newline$ -- 21
-num.names$ -- 8
-pop$ -- 51
-preamble$ -- 1
-purify$ -- 12
-quote$ -- 0
-skip$ -- 53
-stack$ -- 0
-substring$ -- 181
-swap$ -- 65
-text.length$ -- 1
-text.prefix$ -- 0
-top$ -- 0
-type$ -- 18
-warning$ -- 0
-while$ -- 16
-width$ -- 0
-write$ -- 36
diff --git a/R-package/vignettes/xgboost.out b/R-package/vignettes/xgboost.out
deleted file mode 100644
index 6d60796a3..000000000
--- a/R-package/vignettes/xgboost.out
+++ /dev/null
@@ -1,4 +0,0 @@
-\BOOKMARK [1][-]{section.1}{Introduction}{}% 1
-\BOOKMARK [1][-]{section.2}{Example with iris}{}% 2
-\BOOKMARK [1][-]{section.3}{Advanced Examples}{}% 3
-\BOOKMARK [1][-]{section.4}{The Higgs Boson competition}{}% 4
diff --git a/R-package/vignettes/xgboost.tex b/R-package/vignettes/xgboost.tex
deleted file mode 100644
index 0ed4015b7..000000000
--- a/R-package/vignettes/xgboost.tex
+++ /dev/null
@@ -1,319 +0,0 @@
-\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
-%% maxwidth is the original width if it is less than linewidth
-%% otherwise use linewidth (to make sure the graphics do not exceed the margin)
-\makeatletter
-\def\maxwidth{ %
-  \ifdim\Gin@nat@width>\linewidth
-    \linewidth
-  \else
-    \Gin@nat@width
-  \fi
-}
-\makeatother
-
-\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
-\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
-\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
-\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
-\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
-\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
-\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
-\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
-\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
-\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
-
-\usepackage{framed}
-\makeatletter
-\newenvironment{kframe}{%
- \def\at@end@of@kframe{}%
- \ifinner\ifhmode%
-  \def\at@end@of@kframe{\end{minipage}}%
-  \begin{minipage}{\columnwidth}%
- \fi\fi%
- \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
- \colorbox{shadecolor}{##1}\hskip-\fboxsep
-     % There is no \\@totalrightmargin, so:
-     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
- \MakeFramed {\advance\hsize-\width
-   \@totalleftmargin\z@ \linewidth\hsize
-   \@setminipage}}%
- {\par\unskip\endMakeFramed%
- \at@end@of@kframe}
-\makeatother
-
-\definecolor{shadecolor}{rgb}{.97, .97, .97}
-\definecolor{messagecolor}{rgb}{0, 0, 0}
-\definecolor{warningcolor}{rgb}{1, 0, 1}
-\definecolor{errorcolor}{rgb}{1, 0, 0}
-\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
-
-\usepackage{alltt}
-\RequirePackage{url}
-\usepackage{hyperref}
-\RequirePackage{amsmath}
-\RequirePackage{natbib}
-\RequirePackage[a4paper,lmargin={1.25in},rmargin={1.25in},tmargin={1in},bmargin={1in}]{geometry}
-
-\makeatletter
-% \VignetteIndexEntry{xgboost: eXtreme Gradient Boosting}
-%\VignetteKeywords{xgboost, gbm, gradient boosting machines}
-%\VignettePackage{xgboost}
-% \VignetteEngine{knitr::knitr}
-\makeatother
-\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
-\begin{document}
-%\SweaveOpts{concordance=TRUE}
-
-
-
-%
-
-%
-
-    \begin{center}
-    \vspace*{6\baselineskip}
-    \rule{\textwidth}{1.6pt}\vspace*{-\baselineskip}\vspace*{2pt}
-    \rule{\textwidth}{0.4pt}\\[2\baselineskip]
-    {\LARGE \textbf{xgboost: eXtreme Gradient Boosting}}\\[1.2\baselineskip]
-    \rule{\textwidth}{0.4pt}\vspace*{-\baselineskip}\vspace{3.2pt}
-    \rule{\textwidth}{1.6pt}\\[2\baselineskip]
-    {\Large Tianqi Chen, Tong He}\\[\baselineskip]
-    {\large Package Version: 0.3-0}\\[\baselineskip]
-    {\large \today}\par
-    \vfill
-    \end{center}
-
-\thispagestyle{empty}
-
-\clearpage
-
-\setcounter{page}{1}
-
-\section{Introduction}
-
-This is an introductory document of using the \verb@xgboost@ package in R. 
-
-\verb@xgboost@ is short for eXtreme Gradient Boosting package. It is an efficient
- and scalable implementation of gradient boosting framework by \citep{friedman2001greedy}. 
-The package includes efficient linear model solver and tree learning algorithm.
-It supports various objective functions, including regression, classification
-and ranking. The package is made to be extendible, so that users are also allowed to define their own objectives easily. It has several features:
-\begin{enumerate}
-    \item{Speed: }{\verb@xgboost@ can automatically do parallel computation on 
-    Windows and Linux, with openmp. It is generally over 10 times faster than
-    \verb@gbm@.}
-    \item{Input Type: }{\verb@xgboost@ takes several types of input data:}
-    \begin{itemize}
-        \item{Dense Matrix: }{R's dense matrix, i.e. \verb@matrix@}
-        \item{Sparse Matrix: }{R's sparse matrix \verb@Matrix::dgCMatrix@}
-        \item{Data File: }{Local data files}
-        \item{xgb.DMatrix: }{\verb@xgboost@'s own class. Recommended.}
-    \end{itemize}
-    \item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster 
-    and linear booster, and is optimized for sparse input.}
-    \item{Customization: }{\verb@xgboost@ supports customized objective function 
-    and evaluation function}
-    \item{Performance: }{\verb@xgboost@ has better performance on several different
-    datasets.}
-\end{enumerate}
-
-
-\section{Example with iris}
-
-In this section, we will illustrate some common usage of \verb@xgboost@.
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{library}\hlstd{(xgboost)}
-\hlkwd{data}\hlstd{(iris)}
-\hlstd{bst} \hlkwb{<-} \hlkwd{xgboost}\hlstd{(}\hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{]),}\hlkwd{as.numeric}\hlstd{(iris[,}\hlnum{5}\hlstd{]}\hlopt{==}\hlstr{'setosa'}\hlstd{),}
-               \hlkwc{nrounds} \hlstd{=} \hlnum{5}\hlstd{)}
-\end{alltt}
-\begin{verbatim}
-## [0]	train-rmse:0.351971
-## [1]	train-rmse:0.247769
-## [2]	train-rmse:0.174418
-## [3]	train-rmse:0.122783
-## [4]	train-rmse:0.086435
-\end{verbatim}
-\begin{alltt}
-\hlkwd{xgb.save}\hlstd{(bst,} \hlstr{'model.save'}\hlstd{)}
-\end{alltt}
-\begin{verbatim}
-## [1] TRUE
-\end{verbatim}
-\begin{alltt}
-\hlstd{bst} \hlkwb{=} \hlkwd{xgb.load}\hlstd{(}\hlstr{'model.save'}\hlstd{)}
-\hlstd{pred} \hlkwb{<-} \hlkwd{predict}\hlstd{(bst,} \hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{]))}
-\end{alltt}
-\end{kframe}
-\end{knitrout}
-
-\verb@xgboost@ is the main function to train a \verb@Booster@, i.e. a model.
-\verb@predict@ does prediction on the model.
-
-Here we can save the model to a binary local file, and load it when needed.
-We can't inspect the trees inside. However we have another function to save the
-model in plain text. 
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{xgb.dump}\hlstd{(bst,} \hlstr{'model.dump'}\hlstd{)}
-\end{alltt}
-\begin{verbatim}
-## [1] TRUE
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-The output looks like 
-
-\begin{verbatim}
-booster[0]:
-0:[f2<2.45] yes=1,no=2,missing=1
-    1:leaf=0.147059
-    2:[f3<1.65] yes=3,no=4,missing=3
-        3:leaf=0.464151
-        4:leaf=0.722449
-booster[1]:
-0:[f2<2.45] yes=1,no=2,missing=1
-    1:leaf=0.103806
-    2:[f2<4.85] yes=3,no=4,missing=3
-        3:leaf=0.316341
-        4:leaf=0.510365
-\end{verbatim}
-
-It is important to know \verb@xgboost@'s own data type: \verb@xgb.DMatrix@.
-It speeds up \verb@xgboost@, and is needed for advanced features such as 
-training from initial prediction value, weighted training instance. 
-
-We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{iris.mat} \hlkwb{<-} \hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{])}
-\hlstd{iris.label} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(iris[,}\hlnum{5}\hlstd{]}\hlopt{==}\hlstr{'setosa'}\hlstd{)}
-\hlstd{diris} \hlkwb{<-} \hlkwd{xgb.DMatrix}\hlstd{(iris.mat,} \hlkwc{label} \hlstd{= iris.label)}
-\hlkwd{class}\hlstd{(diris)}
-\end{alltt}
-\begin{verbatim}
-## [1] "xgb.DMatrix"
-\end{verbatim}
-\begin{alltt}
-\hlkwd{getinfo}\hlstd{(diris,}\hlstr{'label'}\hlstd{)}
-\end{alltt}
-\begin{verbatim}
-##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
-##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-##  [71] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-## [106] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-## [141] 0 0 0 0 0 0 0 0 0 0
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-We can also save the matrix to a binary file. Then load it simply with 
-\verb@xgb.DMatrix@
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlkwd{xgb.DMatrix.save}\hlstd{(diris,} \hlstr{'iris.xgb.DMatrix'}\hlstd{)}
-\end{alltt}
-\begin{verbatim}
-## 150x4 matrix with 600 entries is saved to iris.xgb.DMatrix
-## [1] TRUE
-\end{verbatim}
-\begin{alltt}
-\hlstd{diris} \hlkwb{=} \hlkwd{xgb.DMatrix}\hlstd{(}\hlstr{'iris.xgb.DMatrix'}\hlstd{)}
-\end{alltt}
-\begin{verbatim}
-## 150x4 matrix with 600 entries is loaded from iris.xgb.DMatrix
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-\section{Advanced Examples}
-
-The function \verb@xgboost@ is a simple function with less parameter, in order
-to be R-friendly. The core training function is wrapped in \verb@xgb.train@. It is more flexible than \verb@xgboost@, but it requires users to read the document a bit more carefully.
-
-\verb@xgb.train@ only accept a \verb@xgb.DMatrix@ object as its input, while it supports advanced features as custom objective and evaluation functions.
-
-\begin{knitrout}
-\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
-\begin{alltt}
-\hlstd{logregobj} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{preds}\hlstd{,} \hlkwc{dtrain}\hlstd{) \{}
-   \hlstd{labels} \hlkwb{<-} \hlkwd{getinfo}\hlstd{(dtrain,} \hlstr{"label"}\hlstd{)}
-   \hlstd{preds} \hlkwb{<-} \hlnum{1}\hlopt{/}\hlstd{(}\hlnum{1} \hlopt{+} \hlkwd{exp}\hlstd{(}\hlopt{-}\hlstd{preds))}
-   \hlstd{grad} \hlkwb{<-} \hlstd{preds} \hlopt{-} \hlstd{labels}
-   \hlstd{hess} \hlkwb{<-} \hlstd{preds} \hlopt{*} \hlstd{(}\hlnum{1} \hlopt{-} \hlstd{preds)}
-   \hlkwd{return}\hlstd{(}\hlkwd{list}\hlstd{(}\hlkwc{grad} \hlstd{= grad,} \hlkwc{hess} \hlstd{= hess))}
-\hlstd{\}}
-
-\hlstd{evalerror} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{preds}\hlstd{,} \hlkwc{dtrain}\hlstd{) \{}
-  \hlstd{labels} \hlkwb{<-} \hlkwd{getinfo}\hlstd{(dtrain,} \hlstr{"label"}\hlstd{)}
-  \hlstd{err} \hlkwb{<-} \hlkwd{sqrt}\hlstd{(}\hlkwd{mean}\hlstd{((preds}\hlopt{-}\hlstd{labels)}\hlopt{^}\hlnum{2}\hlstd{))}
-  \hlkwd{return}\hlstd{(}\hlkwd{list}\hlstd{(}\hlkwc{metric} \hlstd{=} \hlstr{"MSE"}\hlstd{,} \hlkwc{value} \hlstd{= err))}
-\hlstd{\}}
-
-\hlstd{dtest} \hlkwb{<-} \hlkwd{slice}\hlstd{(diris,}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{)}
-\hlstd{watchlist} \hlkwb{<-} \hlkwd{list}\hlstd{(}\hlkwc{eval} \hlstd{= dtest,} \hlkwc{train} \hlstd{= diris)}
-\hlstd{param} \hlkwb{<-} \hlkwd{list}\hlstd{(}\hlkwc{max_depth} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{eta} \hlstd{=} \hlnum{1}\hlstd{,} \hlkwc{silent} \hlstd{=} \hlnum{1}\hlstd{)}
-
-\hlstd{bst} \hlkwb{<-} \hlkwd{xgb.train}\hlstd{(param, diris,} \hlkwc{nround} \hlstd{=} \hlnum{2}\hlstd{, watchlist, logregobj, evalerror)}
-\end{alltt}
-\begin{verbatim}
-## [1]	eval-MSE:1.601	train-MSE:1.76
-## [2]	eval-MSE:2.567	train-MSE:2.745
-\end{verbatim}
-\end{kframe}
-\end{knitrout}
-
-The gradient and second order gradient is required for the output of customized 
-objective function. 
-
-We also have \verb@slice@ for row extraction. It is useful in 
-cross-validation.
-
-For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further 
-details.
-
-\section{The Higgs Boson competition}
-
-We have made a demo for \href{http://www.kaggle.com/c/higgs-boson}{the Higgs 
-Boson Machine Learning Challenge}. 
-
-Here are the instructions to make a submission
-\begin{enumerate}
-    \item Download the \href{http://www.kaggle.com/c/higgs-boson/data}{datasets}
-    and extract them to \verb@data/@.
-    \item Run scripts under \verb@xgboost/demo/kaggle-higgs/@: 
-    \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{higgs-train.R} 
-    and \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-pred.R}{higgs-pred.R}. 
-    The computation will take less than a minute on Intel i7. 
-    \item Go to the \href{http://www.kaggle.com/c/higgs-boson/submissions/attach}{submission page} 
-    and submit your result.
-\end{enumerate}
-
-We provide \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R}{a script}
-to compare the time cost on the higgs dataset with \verb@gbm@ and \verb@xgboost@. 
-The training set contains 350000 records and 30 features. 
-
-\verb@xgboost@ can automatically do parallel computation. On a machine with Intel
-i7-4700MQ and 24GB memories, we found that \verb@xgboost@ costs about 35 seconds, which is about 20 times faster
-than \verb@gbm@. When we limited \verb@xgboost@ to use only one thread, it was 
-still about two times faster than \verb@gbm@. 
-
-Meanwhile, the result from \verb@xgboost@ reaches 
-\href{http://www.kaggle.com/c/higgs-boson/details/evaluation}{3.60@AMS} with a 
-single model. This results stands in the 
-\href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the 
-competition. 
-
-\bibliographystyle{jss}
-\nocite{*} % list uncited references
-\bibliography{xgboost}
-
-\end{document}
-

From 63dd037db6224eb19e803660e6ef7cbbc8e5ff5d Mon Sep 17 00:00:00 2001
From: hetong <hetong007@gmail.com>
Date: Fri, 5 Sep 2014 20:25:38 -0700
Subject: [PATCH 4/5] add r basic walkthrough

---
 demo/guide-R/basic_walkthrough.R | 53 ++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 demo/guide-R/basic_walkthrough.R

diff --git a/demo/guide-R/basic_walkthrough.R b/demo/guide-R/basic_walkthrough.R
new file mode 100644
index 000000000..959e5f0ed
--- /dev/null
+++ b/demo/guide-R/basic_walkthrough.R
@@ -0,0 +1,53 @@
+require(xgboost)
+
+dtrain <- xgb.DMatrix('../data/agaricus.txt.train')
+dtest <- xgb.DMatrix('../data/agaricus.txt.test')
+param <- list(max_depth=2,eta=1,silent=1,objective='binary:logistic')
+watchlist <- list(eval = dtest, train = dtrain)
+num_round <- 2
+bst <- xgb.train(param, dtrain, num_round, watchlist)
+preds <- predict(bst, dtest)
+labels <- getinfo(dtest,'label')
+cat('error=', mean(as.numeric(preds>0.5)!=labels),'\n')
+xgb.save(bst, 'xgb.model')
+xgb.dump(bst, 'dump.raw.txt')
+xgb.dump(bst, 'dump.nuce.txt','../data/featmap.txt')
+
+bst2 <- xgb.load('xgb.model')
+preds2 <- predict(bst2,dtest)
+stopifnot(sum((preds-preds2)^2)==0)
+
+
+cat('start running example of build DMatrix from scipy.sparse CSR Matrix\n')
+read.libsvm <- function(fname, maxcol) {
+    content <- readLines(fname)
+    nline <- length(content)
+    label <- numeric(nline)
+    mat <- matrix(0, nline, maxcol + 1)
+    for (i in 1:nline) {
+        arr <- as.vector(strsplit(content[i], " ")[[1]])
+        label[i] <- as.numeric(arr[[1]])
+        for (j in 2:length(arr)) {
+            kv <- strsplit(arr[j], ":")[[1]]
+            # to avoid 0 index
+            findex <- as.integer(kv[1]) + 1
+            fvalue <- as.numeric(kv[2])
+            mat[i, findex] <- fvalue
+        }
+    }
+    mat <- as(mat, "sparseMatrix")
+    return(list(label = label, data = mat))
+}
+csc <- read.libsvm("../data/agaricus.txt.train", 126)
+y <- csc$label
+x <- csc$data
+class(x)
+dtrain <- xgb.DMatrix(x, label = y)
+bst <- xgb.train(param, dtrain, num_round, watchlist)
+
+cat('start running example of build DMatrix from numpy array\n')
+x <- as.matrix(x)
+class(x)
+dtrain <- xgb.DMatrix(x, label = y)
+bst <- xgb.train(param, dtrain, num_round, watchlist)
+

From af07f5135a56a985c0f16fadb667de93d0873d29 Mon Sep 17 00:00:00 2001
From: hetong <hetong007@gmail.com>
Date: Fri, 5 Sep 2014 20:33:39 -0700
Subject: [PATCH 5/5] cleaning