From fbf2707561e6184cbf1e1b1d65dc0b30638edcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Benesty?= Date: Tue, 8 Dec 2015 18:18:51 +0100 Subject: [PATCH] Wording improvement --- R-package/R/xgb.create.features.R | 2 +- R-package/R/xgb.importance.R | 13 ++++--------- R-package/R/xgb.plot.deepness.R | 3 ++- R-package/man/xgb.create.features.Rd | 2 +- R-package/man/xgb.importance.Rd | 11 ++++------- R-package/man/xgb.plot.deepness.Rd | 3 ++- 6 files changed, 14 insertions(+), 20 deletions(-) diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R index bde791fcf..bd913a81c 100644 --- a/R-package/R/xgb.create.features.R +++ b/R-package/R/xgb.create.features.R @@ -14,7 +14,7 @@ #' @details #' This is the function inspired from the paragraph 3.1 of the paper: #' -#' \strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"} +#' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook} #' #' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, #' Joaquin QuiƱonero Candela)} diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 2cd0788cf..722427fcb 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -21,7 +21,7 @@ #' @details #' This is the function to understand the model trained (and through your model, your data). #' -#' Results are returned for both linear and tree models. +#' This function is for both linear and tree models. #' #' \code{data.table} is returned by the function. #' The columns are : @@ -32,8 +32,9 @@ #' \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. #' } #' -#' If you don't provide name, index of the features are used. -#' They are extracted from the boost dump (made on the C++ side), the index starts at 0 (usual in C++) instead of 1 (usual in R). +#' If you don't provide \code{feature_names}, index of the features will be used instead. +#' +#' Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R). #' #' Co-occurence count #' ------------------ @@ -47,10 +48,6 @@ #' @examples #' data(agaricus.train, package='xgboost') #' -#' # Both dataset are list with two items, a sparse matrix and labels -#' # (labels = outcome column which will be learned). -#' # Each column of the sparse Matrix is a feature in one hot encoding format. -#' #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' @@ -114,8 +111,6 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe result } - - # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... diff --git a/R-package/R/xgb.plot.deepness.R b/R-package/R/xgb.plot.deepness.R index b6c05f727..2a20532f6 100644 --- a/R-package/R/xgb.plot.deepness.R +++ b/R-package/R/xgb.plot.deepness.R @@ -76,6 +76,7 @@ get.paths.to.leaf <- function(dt.tree) { #' @details #' Display both the number of \code{leaf} and the distribution of \code{weighted observations} #' by tree deepness level. +#' #' The purpose of this function is to help the user to find the best trade-off to set #' the \code{max.depth} and \code{min_child_weight} parameters according to the bias / variance trade-off. #' @@ -88,7 +89,7 @@ get.paths.to.leaf <- function(dt.tree) { #' \item Weighted cover: noramlized weighted cover per Leaf (weighted number of instances). #' } #' -#' This function is inspired by this blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html} +#' This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html} #' #' @examples #' data(agaricus.train, package='xgboost') diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd index 1e75cab8d..cab2ab654 100644 --- a/R-package/man/xgb.create.features.Rd +++ b/R-package/man/xgb.create.features.Rd @@ -20,7 +20,7 @@ May improve the learning by adding new features to the training data based on th \details{ This is the function inspired from the paragraph 3.1 of the paper: -\strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"} +\strong{Practical Lessons from Predicting Clicks on Ads at Facebook} \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, Joaquin QuiƱonero Candela)} diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd index 1f845a1f9..4157d2181 100644 --- a/R-package/man/xgb.importance.Rd +++ b/R-package/man/xgb.importance.Rd @@ -27,7 +27,7 @@ Create a \code{data.table} of the most important features of a model. \details{ This is the function to understand the model trained (and through your model, your data). -Results are returned for both linear and tree models. +This function is for both linear and tree models. \code{data.table} is returned by the function. The columns are : @@ -38,8 +38,9 @@ The columns are : \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. } -If you don't provide name, index of the features are used. -They are extracted from the boost dump (made on the C++ side), the index starts at 0 (usual in C++) instead of 1 (usual in R). +If you don't provide \code{feature_names}, index of the features will be used instead. + +Because the index is extracted from the model dump (made on the C++ side), it starts at 0 (usual in C++) instead of 1 (usual in R). Co-occurence count ------------------ @@ -53,10 +54,6 @@ If you need to remember one thing only: until you want to leave us early, don't \examples{ data(agaricus.train, package='xgboost') -# Both dataset are list with two items, a sparse matrix and labels -# (labels = outcome column which will be learned). -# Each column of the sparse Matrix is a feature in one hot encoding format. - bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") diff --git a/R-package/man/xgb.plot.deepness.Rd b/R-package/man/xgb.plot.deepness.Rd index 6488514dd..c8ed130e2 100644 --- a/R-package/man/xgb.plot.deepness.Rd +++ b/R-package/man/xgb.plot.deepness.Rd @@ -18,6 +18,7 @@ Generate a graph to plot the distribution of deepness among trees. \details{ Display both the number of \code{leaf} and the distribution of \code{weighted observations} by tree deepness level. + The purpose of this function is to help the user to find the best trade-off to set the \code{max.depth} and \code{min_child_weight} parameters according to the bias / variance trade-off. @@ -30,7 +31,7 @@ The graph is made of two parts: \item Weighted cover: noramlized weighted cover per Leaf (weighted number of instances). } -This function is inspired by this blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html} +This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html} } \examples{ data(agaricus.train, package='xgboost')