diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 7fec935f6..0384fc599 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -25,5 +25,4 @@ Imports: data.table (>= 1.9), magrittr (>= 1.5), stringr, - DiagrammeR, - vcd + DiagrammeR diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R index 15af9f34d..122e116c7 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/predict.xgb.Booster.R @@ -11,10 +11,10 @@ setClass("xgb.Booster") #' value of sum of functions, when outputmargin=TRUE, the prediction is #' untransformed margin value. In logistic regression, outputmargin=T will #' output value before logistic transformation. -#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object. #' @param ntreelimit limit number of trees used in prediction, this parameter is #' only valid for gbtree, but not for gblinear. set it to be value bigger #' than 0. It will use all trees by default. +#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object. #' @examples #' data(agaricus.train, package='xgboost') #' data(agaricus.test, package='xgboost') diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 988f67470..0aae574fb 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -32,7 +32,7 @@ #' @param nfold number of folds used #' @param label option field, when data is Matrix #' @param missing Missing is only used when input is dense matrix, pick a float -# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values. +#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values. #' @param prediction A logical value indicating whether to return the prediction vector. #' @param showsd \code{boolean}, whether show standard deviation of cross validation #' @param metrics, list of evaluation metrics to be used in corss validation, diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index b6c829663..7658557dd 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -29,7 +29,7 @@ #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' eta = 1, nround = 2,objective = "binary:logistic") #' # save the model in file 'xgb.model.dump' -#' xgb.dump(bst, 'xgb.model.dump', with.stats = T) +#' xgb.dump(bst, 'xgb.model.dump', with.stats = TRUE) #' #' # print the model without saving it to a file #' print(xgb.dump(bst)) @@ -54,4 +54,4 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with.stats=FALSE) { result %>% str_split("\n") %>% unlist %>% Filter(function(x) x != "", .) %>% writeLines(fname) return(TRUE) } -} \ No newline at end of file +} diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 69715d3cb..c2688848b 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -32,7 +32,8 @@ #' data(agaricus.train, package='xgboost') #' data(agaricus.test, package='xgboost') #' -#' #Both dataset are list with two items, a sparse matrix and labels (labels = outcome column which will be learned). +#' #Both dataset are list with two items, a sparse matrix and labels +#' (labels = outcome column which will be learned). #' #Each column of the sparse Matrix is a feature in one hot encoding format. #' train <- agaricus.train #' test <- agaricus.test diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 87b9f3a99..b67597126 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -42,7 +42,8 @@ #' @examples #' data(agaricus.train, package='xgboost') #' -#' #Both dataset are list with two items, a sparse matrix and labels (labels = outcome column which will be learned). +#' #Both dataset are list with two items, a sparse matrix and labels +#' (labels = outcome column which will be learned). #' #Each column of the sparse Matrix is a feature in one hot encoding format. #' train <- agaricus.train #' diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 01261fab3..443446916 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -42,7 +42,8 @@ #' @examples #' data(agaricus.train, package='xgboost') #' -#' #Both dataset are list with two items, a sparse matrix and labels (labels = outcome column which will be learned). +#' #Both dataset are list with two items, a sparse matrix and labels +#' (labels = outcome column which will be learned). #' #Each column of the sparse Matrix is a feature in one hot encoding format. #' train <- agaricus.train #' diff --git a/R-package/data/agaricus.test.rda b/R-package/data/agaricus.test.rda index bffe6de21..ad8d50af7 100644 Binary files a/R-package/data/agaricus.test.rda and b/R-package/data/agaricus.test.rda differ diff --git a/R-package/data/agaricus.train.rda b/R-package/data/agaricus.train.rda index c471d0173..3f5f24144 100644 Binary files a/R-package/data/agaricus.train.rda and b/R-package/data/agaricus.train.rda differ diff --git a/R-package/demo/00Index b/R-package/demo/00Index index 345d7ca4f..969da0d91 100644 --- a/R-package/demo/00Index +++ b/R-package/demo/00Index @@ -4,4 +4,5 @@ boost_from_prediction Boosting from existing prediction predict_first_ntree Predicting using first n trees generalized_linear_model Generalized Linear Model cross_validation Cross validation -create_sparse_matrix +create_sparse_matrix Create Sparse Matrix +predict_leaf_indices Predicting the corresponding leaves diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R index 4060d1c48..ac96510a3 100644 --- a/R-package/demo/create_sparse_matrix.R +++ b/R-package/demo/create_sparse_matrix.R @@ -1,7 +1,7 @@ require(xgboost) require(Matrix) require(data.table) -require(vcd) #Available in Cran. Used for its dataset with categorical values. +if (!require(vcd)) install.packages('vcd') #Available in Cran. Used for its dataset with categorical values. # According to its documentation, Xgboost works only on numbers. # Sometimes the dataset we have to work on have categorical data. @@ -86,4 +86,4 @@ print(chisq.test(df$AgeCat, df$Y)) # As you can see, in general destroying information by simplying it won't improve your model. Chi2 just demonstrates that. But in more complex cases, creating a new feature based on existing one which makes link with the outcome more obvious may help the algorithm and improve the model. The case studied here is not enough complex to show that. Check Kaggle forum for some challenging datasets. # However it's almost always worse when you add some arbitrary rules. -# Moreover, you can notice that even if we have added some not useful new features highly correlated with other features, the boosting tree algorithm have been able to choose the best one, which in this case is the Age. Linear model may not be that strong in these scenario. \ No newline at end of file +# Moreover, you can notice that even if we have added some not useful new features highly correlated with other features, the boosting tree algorithm have been able to choose the best one, which in this case is the Age. Linear model may not be that strong in these scenario.