From fbf2707561e6184cbf1e1b1d65dc0b30638edcf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C3=ABl=20Benesty?= <pommedeterresautee@msn.com>
Date: Tue, 8 Dec 2015 18:18:51 +0100
Subject: [PATCH] Wording improvement

---
 R-package/R/xgb.create.features.R    |  2 +-
 R-package/R/xgb.importance.R         | 13 ++++---------
 R-package/R/xgb.plot.deepness.R      |  3 ++-
 R-package/man/xgb.create.features.Rd |  2 +-
 R-package/man/xgb.importance.Rd      | 11 ++++-------
 R-package/man/xgb.plot.deepness.Rd   |  3 ++-
 6 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R
index bde791fcf..bd913a81c 100644
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -14,7 +14,7 @@
 #' @details 
 #' This is the function inspired from the paragraph 3.1 of the paper:
 #' 
-#' \strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"}
+#' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
 #' 
 #' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, 
 #' Joaquin Quiñonero Candela)}
diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R
index 2cd0788cf..722427fcb 100644
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -21,7 +21,7 @@
 #' @details 
 #' This is the function to understand the model trained (and through your model, your data).
 #' 
-#' Results are returned for both linear and tree models.
+#' This function is for both linear and tree models.
 #' 
 #' \code{data.table} is returned by the function. 
 #' The columns are :
@@ -32,8 +32,9 @@
 #'   \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees.
 #' }
 #' 
-#' If you don't provide name, index of the features are used.
-#' They are extracted from the boost dump (made on the C++ side), the index starts at 0 (usual in C++) instead of 1 (usual in R).
+#' If you don't provide \code{feature_names}, index of the features will be used instead.
+#' 
+#' Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R).
 #' 
 #' Co-occurence count
 #' ------------------
@@ -47,10 +48,6 @@
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' 
-#' # Both dataset are list with two items, a sparse matrix and labels 
-#' # (labels = outcome column which will be learned). 
-#' # Each column of the sparse Matrix is a feature in one hot encoding format.
-#' 
 #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, 
 #'                eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
 #' 
@@ -114,8 +111,6 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
   result
 }
 
-
-
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
diff --git a/R-package/R/xgb.plot.deepness.R b/R-package/R/xgb.plot.deepness.R
index b6c05f727..2a20532f6 100644
--- a/R-package/R/xgb.plot.deepness.R
+++ b/R-package/R/xgb.plot.deepness.R
@@ -76,6 +76,7 @@ get.paths.to.leaf <- function(dt.tree) {
 #' @details
 #' Display both the number of \code{leaf} and the distribution of \code{weighted observations}
 #' by tree deepness level.
+#' 
 #' The purpose of this function is to help the user to find the best trade-off to set
 #' the \code{max.depth} and \code{min_child_weight} parameters according to the bias / variance trade-off.
 #' 
@@ -88,7 +89,7 @@ get.paths.to.leaf <- function(dt.tree) {
 #'  \item Weighted cover: noramlized weighted cover per Leaf (weighted number of instances).
 #' }
 #'
-#' This function is inspired by this blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
+#' This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
 #'
 #' @examples
 #' data(agaricus.train, package='xgboost')
diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd
index 1e75cab8d..cab2ab654 100644
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -20,7 +20,7 @@ May improve the learning by adding new features to the training data based on th
 \details{
 This is the function inspired from the paragraph 3.1 of the paper:
 
-\strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"}
+\strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
 
 \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, 
 Joaquin Quiñonero Candela)}
diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd
index 1f845a1f9..4157d2181 100644
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@@ -27,7 +27,7 @@ Create a \code{data.table} of the most important features of a model.
 \details{
 This is the function to understand the model trained (and through your model, your data).
 
-Results are returned for both linear and tree models.
+This function is for both linear and tree models.
 
 \code{data.table} is returned by the function. 
 The columns are :
@@ -38,8 +38,9 @@ The columns are :
   \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees.
 }
 
-If you don't provide name, index of the features are used.
-They are extracted from the boost dump (made on the C++ side), the index starts at 0 (usual in C++) instead of 1 (usual in R).
+If you don't provide \code{feature_names}, index of the features will be used instead.
+
+Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R).
 
 Co-occurence count
 ------------------
@@ -53,10 +54,6 @@ If you need to remember one thing only: until you want to leave us early, don't
 \examples{
 data(agaricus.train, package='xgboost')
 
-# Both dataset are list with two items, a sparse matrix and labels 
-# (labels = outcome column which will be learned). 
-# Each column of the sparse Matrix is a feature in one hot encoding format.
-
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, 
                eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
 
diff --git a/R-package/man/xgb.plot.deepness.Rd b/R-package/man/xgb.plot.deepness.Rd
index 6488514dd..c8ed130e2 100644
--- a/R-package/man/xgb.plot.deepness.Rd
+++ b/R-package/man/xgb.plot.deepness.Rd
@@ -18,6 +18,7 @@ Generate a graph to plot the distribution of deepness among trees.
 \details{
 Display both the number of \code{leaf} and the distribution of \code{weighted observations}
 by tree deepness level.
+
 The purpose of this function is to help the user to find the best trade-off to set
 the \code{max.depth} and \code{min_child_weight} parameters according to the bias / variance trade-off.
 
@@ -30,7 +31,7 @@ The graph is made of two parts:
  \item Weighted cover: noramlized weighted cover per Leaf (weighted number of instances).
 }
 
-This function is inspired by this blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
+This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
 }
 \examples{
 data(agaricus.train, package='xgboost')