From b8711226e2f121a7cb76200716421ee840c0f900 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Thu, 2 Apr 2015 19:48:23 -0500
Subject: [PATCH 1/6] added an option for stratified CV to xgb.cv

---
 R-package/R/utils.R  | 77 ++++++++++++++++++++++++++++++++++++++++----
 R-package/R/xgb.cv.R | 11 ++++---
 2 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 7336ed213..b0b565a3d 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -214,18 +214,30 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL, prediction = F
 #------------------------------------------
 # helper functions for cross validation
 #
-xgb.cv.mknfold <- function(dall, nfold, param) {
+xgb.cv.mknfold <- function(dall, nfold, param, stratified) {
   if (nfold <= 1) {
     stop("nfold must be bigger than 1")
   }
   randidx <- sample(1 : xgb.numrow(dall))
-  kstep <- length(randidx) %/% nfold
-  idset <- list()
-  for (i in 1:(nfold-1)) {
-    idset[[i]] = randidx[1:kstep]
-    randidx = setdiff(randidx,idset[[i]])
+  y <- getinfo(dall, 'label')
+  if (stratified & length(y) == length(randidx)) {
+    y <- y[randidx]
+    # by default assume that y is a classification label, and only
+    # leave it numeric for the reg:linear objective
+    # WARNING: if there would be any objectives with truly numerical 
+    #  they would not currently be treated correctly.
+    if (param[['objective']] != 'reg:linear') y <- factor(y)
+    idset <- xgb.createFolds(y, nfold)
+  } else { 
+    # make simple non-stratified folds
+    kstep <- length(randidx) %/% nfold
+    idset <- list()
+    for (i in 1:(nfold-1)) {
+      idset[[i]] = randidx[1:kstep]
+      randidx = setdiff(randidx,idset[[i]])
+    }
+    idset[[nfold]] = randidx
   }
-  idset[[nfold]] = randidx
   ret <- list()
   for (k in 1:nfold) {
     dtest <- slice(dall, idset[[k]])
@@ -242,6 +254,7 @@ xgb.cv.mknfold <- function(dall, nfold, param) {
   }
   return (ret)
 }
+
 xgb.cv.aggcv <- function(res, showsd = TRUE) {
   header <- res[[1]]
   ret <- header[1]
@@ -261,3 +274,53 @@ xgb.cv.aggcv <- function(res, showsd = TRUE) {
   }
   return (ret)
 }
+
+# Shamelessly copied from caret::createFolds
+# and simplified by always returning an unnamed list of test indices
+xgb.createFolds <- function(y, k = 10)
+{
+  if(is.numeric(y)) {
+    ## Group the numeric data based on their magnitudes
+    ## and sample within those groups.
+
+    ## When the number of samples is low, we may have
+    ## issues further slicing the numeric data into
+    ## groups. The number of groups will depend on the
+    ## ratio of the number of folds to the sample size.
+    ## At most, we will use quantiles. If the sample
+    ## is too small, we just do regular unstratified
+    ## CV
+    cuts <- floor(length(y)/k)
+    if(cuts < 2) cuts <- 2
+    if(cuts > 5) cuts <- 5
+    y <- cut(y,
+             unique(quantile(y, probs = seq(0, 1, length = cuts))),
+             include.lowest = TRUE)
+  }
+
+  if(k < length(y)) {
+    ## reset levels so that the possible levels and
+    ## the levels in the vector are the same
+    y <- factor(as.character(y))
+    numInClass <- table(y)
+    foldVector <- vector(mode = "integer", length(y))
+    
+    ## For each class, balance the fold allocation as far
+    ## as possible, then resample the remainder.
+    ## The final assignment of folds is also randomized.
+    for(i in 1:length(numInClass)) {
+      ## create a vector of integers from 1:k as many times as possible without
+      ## going over the number of samples in the class. Note that if the number
+      ## of samples in a class is less than k, nothing is producd here.
+      seqVector <- rep(1:k, numInClass[i] %/% k)
+      ## add enough random integers to get  length(seqVector) == numInClass[i]
+      if(numInClass[i] %% k > 0) seqVector <- c(seqVector, sample(1:k, numInClass[i] %% k))
+      ## shuffle the integers for fold assignment and assign to this classes's data
+      foldVector[which(y == dimnames(numInClass)$y[i])] <- sample(seqVector)
+    }
+  } else foldVector <- seq(along = y)
+
+  out <- split(seq(along = y), foldVector)
+  names(out) <- NULL
+  out
+}
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 3472ead4d..38f70b973 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -46,11 +46,12 @@
 #'   \item \code{merror} Exact matching error, used to evaluate multi-class classification
 #' }
 #' @param obj customized objective function. Returns gradient and second order 
-#'   gradient with given prediction and dtrain, 
+#'   gradient with given prediction and dtrain.
 #' @param feval custimized evaluation function. Returns 
 #'   \code{list(metric='metric-name', value='metric-value')} with given 
-#'   prediction and dtrain,
-#' @param verbose \code{boolean}, print the statistics during the process.
+#'   prediction and dtrain.
+#' @param stratified \code{boolean}, whether the sampling of folds should be stratified by the values of labels in \code{data}
+#' @param verbose \code{boolean}, print the statistics during the process
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @return A \code{data.table} with each mean and standard deviation stat for training set and test set.
@@ -76,7 +77,7 @@
 #'
 xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NULL, 
                    prediction = FALSE, showsd = TRUE, metrics=list(), 
-                   obj = NULL, feval = NULL, verbose = T,...) {
+                   obj = NULL, feval = NULL, stratified = TRUE, verbose = T,...) {
   if (typeof(params) != "list") {
     stop("xgb.cv: first argument params must be list")
   }
@@ -94,7 +95,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
     params <- append(params, list("eval_metric"=mc))
   }
 
-  folds <- xgb.cv.mknfold(dtrain, nfold, params)
+  folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified)
   obj_type = params[['objective']]
   mat_pred = FALSE
   if (!is.null(obj_type) && obj_type=='multi:softprob')

From 611d69c771f5fad6eedec598839fc5175f83957e Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Thu, 2 Apr 2015 19:59:06 -0500
Subject: [PATCH 2/6] fix some wording

---
 R-package/R/utils.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index b0b565a3d..ca32901dc 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -222,10 +222,10 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified) {
   y <- getinfo(dall, 'label')
   if (stratified & length(y) == length(randidx)) {
     y <- y[randidx]
-    # by default assume that y is a classification label, and only
-    # leave it numeric for the reg:linear objective
-    # WARNING: if there would be any objectives with truly numerical 
-    #  they would not currently be treated correctly.
+    # By default assume that y is a classification label,
+    # and only leave it numeric for the reg:linear objective.
+    # WARNING: if there would be any other objectives with truly
+    #   numerical labels, they currently would not be treated correctly!
     if (param[['objective']] != 'reg:linear') y <- factor(y)
     idset <- xgb.createFolds(y, nfold)
   } else { 

From b04920d8e7e1bda51077c07e7feb7a187627b209 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Fri, 3 Apr 2015 11:14:09 -0500
Subject: [PATCH 3/6] update documentation for xgb.cv

---
 R-package/man/xgb.cv.Rd | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 8b65d9d4d..6f8ee06cb 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -6,7 +6,7 @@
 \usage{
 xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
   missing = NULL, prediction = FALSE, showsd = TRUE, metrics = list(),
-  obj = NULL, feval = NULL, verbose = T, ...)
+  obj = NULL, feval = NULL, stratified = TRUE, verbose = T, ...)
 }
 \arguments{
 \item{params}{the list of parameters. Commonly used ones are:
@@ -51,13 +51,15 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
 }}
 
 \item{obj}{customized objective function. Returns gradient and second order
-gradient with given prediction and dtrain,}
+gradient with given prediction and dtrain.}
 
 \item{feval}{custimized evaluation function. Returns
 \code{list(metric='metric-name', value='metric-value')} with given
-prediction and dtrain,}
+prediction and dtrain.}
 
-\item{verbose}{\code{boolean}, print the statistics during the process.}
+\item{stratified}{\code{boolean}, whether the sampling of folds should be stratified by the values of labels in \code{data}}
+
+\item{verbose}{\code{boolean}, print the statistics during the process}
 
 \item{...}{other parameters to pass to \code{params}.}
 }

From 271e8202a7b077f9a5afab25eca3d67471303878 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Fri, 3 Apr 2015 12:20:34 -0500
Subject: [PATCH 4/6] force xgb.cv to return numeric performance values instead
 of character; update its docs

---
 R-package/R/xgb.cv.R    | 15 +++++++++++----
 R-package/man/xgb.cv.Rd | 10 ++++++++--
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 38f70b973..db2ecf103 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -50,12 +50,19 @@
 #' @param feval custimized evaluation function. Returns 
 #'   \code{list(metric='metric-name', value='metric-value')} with given 
 #'   prediction and dtrain.
-#' @param stratified \code{boolean}, whether the sampling of folds should be stratified by the values of labels in \code{data}
+#' @param stratified \code{boolean}, whether sampling of folds should be stratified by the values of labels in \code{data}
 #' @param verbose \code{boolean}, print the statistics during the process
 #' @param ... other parameters to pass to \code{params}.
 #' 
-#' @return A \code{data.table} with each mean and standard deviation stat for training set and test set.
-#' 
+#' @return
+#' If \code{prediction = TRUE}, a list with the following elements is returned:
+#' \itemize{
+#'   \item \code{dt} a \code{data.table} with each mean and standard deviation stat for training set and test set
+#'   \item \code{pred} an array or matrix (for multiclass classification) with predictions for each CV-fold for the model having been trained on the data in all other folds.
+#' }
+#'
+#' If \code{prediction = FALSE}, just a \code{data.table} with each mean and standard deviation stat for training set and test set is returned.
+#'
 #' @details 
 #' The original sample is randomly partitioned into \code{nfold} equal size subsamples. 
 #' 
@@ -148,7 +155,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
   dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
   split <- str_split(string = history, pattern = "\t")
   
-  for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.list %>% {vec <- .; rbindlist(list(dt, vec), use.names = F, fill = F)}
+  for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
   
   if (prediction) {
     return(list(dt = dt,pred = predictValues))
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 6f8ee06cb..20423f76a 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -57,14 +57,20 @@ gradient with given prediction and dtrain.}
 \code{list(metric='metric-name', value='metric-value')} with given
 prediction and dtrain.}
 
-\item{stratified}{\code{boolean}, whether the sampling of folds should be stratified by the values of labels in \code{data}}
+\item{stratified}{\code{boolean}, whether sampling of folds should be stratified by the values of labels in \code{data}}
 
 \item{verbose}{\code{boolean}, print the statistics during the process}
 
 \item{...}{other parameters to pass to \code{params}.}
 }
 \value{
-A \code{data.table} with each mean and standard deviation stat for training set and test set.
+If \code{prediction = TRUE}, a list with the following elements is returned:
+\itemize{
+  \item \code{dt} a \code{data.table} with each mean and standard deviation stat for training set and test set
+  \item \code{pred} an array or matrix (for multiclass classification) with predictions for each CV-fold for the model having been trained on the data in all other folds.
+}
+
+If \code{prediction = FALSE}, just a \code{data.table} with each mean and standard deviation stat for training set and test set is returned.
 }
 \description{
 The cross valudation function of xgboost

From 31b0e53cd462460ff1b80e281872dd3c3903f3fd Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Fri, 3 Apr 2015 13:24:04 -0500
Subject: [PATCH 5/6] make it possible to use a list of pre-defined CV folds in
 xgb.cv

---
 R-package/R/utils.R     | 46 +++++++++++++++++++++--------------------
 R-package/R/xgb.cv.R    | 16 ++++++++++----
 R-package/man/xgb.cv.Rd |  8 +++++--
 3 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index ca32901dc..a9ea767a8 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -214,43 +214,45 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL, prediction = F
 #------------------------------------------
 # helper functions for cross validation
 #
-xgb.cv.mknfold <- function(dall, nfold, param, stratified) {
+xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
   if (nfold <= 1) {
     stop("nfold must be bigger than 1")
   }
-  randidx <- sample(1 : xgb.numrow(dall))
-  y <- getinfo(dall, 'label')
-  if (stratified & length(y) == length(randidx)) {
-    y <- y[randidx]
-    # By default assume that y is a classification label,
-    # and only leave it numeric for the reg:linear objective.
-    # WARNING: if there would be any other objectives with truly
-    #   numerical labels, they currently would not be treated correctly!
-    if (param[['objective']] != 'reg:linear') y <- factor(y)
-    idset <- xgb.createFolds(y, nfold)
-  } else { 
-    # make simple non-stratified folds
-    kstep <- length(randidx) %/% nfold
-    idset <- list()
-    for (i in 1:(nfold-1)) {
-      idset[[i]] = randidx[1:kstep]
-      randidx = setdiff(randidx,idset[[i]])
+  if(is.null(folds)) {
+    y <- getinfo(dall, 'label')
+    randidx <- sample(1 : xgb.numrow(dall))
+    if (stratified & length(y) == length(randidx)) {
+      y <- y[randidx]
+      # By default assume that y is a classification label,
+      # and only leave it numeric for the reg:linear objective.
+      # WARNING: if there would be any other objectives with truly
+      #   numerical labels, they currently would not be treated correctly!
+      if (param[['objective']] != 'reg:linear') y <- factor(y)
+      folds <- xgb.createFolds(y, nfold)
+    } else { 
+      # make simple non-stratified folds
+      kstep <- length(randidx) %/% nfold
+      folds <- list()
+      for (i in 1:(nfold-1)) {
+        folds[[i]] = randidx[1:kstep]
+        randidx = setdiff(randidx, folds[[i]])
+      }
+      folds[[nfold]] = randidx
     }
-    idset[[nfold]] = randidx
   }
   ret <- list()
   for (k in 1:nfold) {
-    dtest <- slice(dall, idset[[k]])
+    dtest <- slice(dall, folds[[k]])
     didx = c()
     for (i in 1:nfold) {
       if (i != k) {
-        didx <- append(didx, idset[[i]])
+        didx <- append(didx, folds[[i]])
       }
     }
     dtrain <- slice(dall, didx)
     bst <- xgb.Booster(param, list(dtrain, dtest))
     watchlist = list(train=dtrain, test=dtest)
-    ret[[k]] <- list(dtrain=dtrain, booster=bst, watchlist=watchlist, index=idset[[k]])
+    ret[[k]] <- list(dtrain=dtrain, booster=bst, watchlist=watchlist, index=folds[[k]])
   }
   return (ret)
 }
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index db2ecf103..e5f5c7b72 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -50,7 +50,9 @@
 #' @param feval custimized evaluation function. Returns 
 #'   \code{list(metric='metric-name', value='metric-value')} with given 
 #'   prediction and dtrain.
-#' @param stratified \code{boolean}, whether sampling of folds should be stratified by the values of labels in \code{data}
+#' @param stratified \code{boolean} whether sampling of folds should be stratified by the values of labels in \code{data}
+#' @param folds \code{list} provides a possibility of using a list of pre-defined CV folds (each element must be a vector of fold's indices).
+#'   If folds are supplied, the nfold and stratified parameters would be ignored.
 #' @param verbose \code{boolean}, print the statistics during the process
 #' @param ... other parameters to pass to \code{params}.
 #' 
@@ -84,10 +86,16 @@
 #'
 xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NULL, 
                    prediction = FALSE, showsd = TRUE, metrics=list(), 
-                   obj = NULL, feval = NULL, stratified = TRUE, verbose = T,...) {
+                   obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T,...) {
   if (typeof(params) != "list") {
     stop("xgb.cv: first argument params must be list")
   }
+  if(!is.null(folds)) {
+    if(class(folds)!="list" | length(folds) < 2) {
+      stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
+    }
+    nfold <- length(folds)
+  }
   if (nfold <= 1) {
     stop("nfold must be bigger than 1")
   }
@@ -102,7 +110,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
     params <- append(params, list("eval_metric"=mc))
   }
 
-  folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified)
+  xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
   obj_type = params[['objective']]
   mat_pred = FALSE
   if (!is.null(obj_type) && obj_type=='multi:softprob')
@@ -119,7 +127,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
   for (i in 1:nrounds) {
     msg <- list()
     for (k in 1:nfold) {
-      fd <- folds[[k]]
+      fd <- xgb_folds[[k]]
       succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
       if (i<nrounds) {
           msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 20423f76a..feee4e18f 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -6,7 +6,8 @@
 \usage{
 xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
   missing = NULL, prediction = FALSE, showsd = TRUE, metrics = list(),
-  obj = NULL, feval = NULL, stratified = TRUE, verbose = T, ...)
+  obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
+  verbose = T, ...)
 }
 \arguments{
 \item{params}{the list of parameters. Commonly used ones are:
@@ -57,7 +58,10 @@ gradient with given prediction and dtrain.}
 \code{list(metric='metric-name', value='metric-value')} with given
 prediction and dtrain.}
 
-\item{stratified}{\code{boolean}, whether sampling of folds should be stratified by the values of labels in \code{data}}
+\item{stratified}{\code{boolean} whether sampling of folds should be stratified by the values of labels in \code{data}}
+
+\item{folds}{\code{list} provides a possibility of using a list of pre-defined CV folds (each element must be a vector of fold's indices).
+If folds are supplied, the nfold and stratified parameters would be ignored.}
 
 \item{verbose}{\code{boolean}, print the statistics during the process}
 

From aefd234da3ed64164259254282991a251185c061 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Tue, 7 Apr 2015 17:43:53 -0500
Subject: [PATCH 6/6] moved the external graphing packages to Suggested in
 order to trim the dependencies

---
 R-package/DESCRIPTION             | 13 +++++++------
 R-package/NAMESPACE               | 12 ------------
 R-package/R/xgb.plot.importance.R | 21 ++++++++-------------
 R-package/R/xgb.plot.tree.R       |  9 ++++++---
 4 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 6de75f930..3d5319411 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -18,7 +18,12 @@ License: Apache License (== 2.0) | file LICENSE
 URL: https://github.com/dmlc/xgboost
 BugReports: https://github.com/dmlc/xgboost/issues
 VignetteBuilder: knitr
-Suggests: knitr
+Suggests:
+    knitr,
+    ggplot2 (>= 1.0.0),
+    DiagrammeR (>= 0.4),
+    Ckmeans.1d.dp (>= 3.3.1),
+    vcd (>= 1.3)
 Depends:
     R (>= 2.10)
 Imports:
@@ -26,8 +31,4 @@ Imports:
     methods,
     data.table (>= 1.9.4),
     magrittr (>= 1.5),
-    stringr (>= 0.6.2),
-    DiagrammeR (>= 0.4),
-    ggplot2 (>= 1.0.0),
-    Ckmeans.1d.dp (>= 3.3.1),
-    vcd (>= 1.3)
+    stringr (>= 0.6.2)
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index a1d296d80..d7f9e455c 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -21,8 +21,6 @@ exportMethods(predict)
 import(methods)
 importClassesFrom(Matrix,dgCMatrix)
 importClassesFrom(Matrix,dgeMatrix)
-importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp)
-importFrom(DiagrammeR,mermaid)
 importFrom(Matrix,cBind)
 importFrom(Matrix,colSums)
 importFrom(Matrix,sparseVector)
@@ -34,16 +32,6 @@ importFrom(data.table,fread)
 importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(data.table,setnames)
-importFrom(ggplot2,aes)
-importFrom(ggplot2,coord_flip)
-importFrom(ggplot2,element_blank)
-importFrom(ggplot2,element_text)
-importFrom(ggplot2,geom_bar)
-importFrom(ggplot2,ggplot)
-importFrom(ggplot2,ggtitle)
-importFrom(ggplot2,theme)
-importFrom(ggplot2,xlab)
-importFrom(ggplot2,ylab)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,add)
 importFrom(magrittr,not)
diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R
index 66dcadaec..3efc3fda9 100644
--- a/R-package/R/xgb.plot.importance.R
+++ b/R-package/R/xgb.plot.importance.R
@@ -2,17 +2,6 @@
 #' 
 #' Read a data.table containing feature importance details and plot it.
 #' 
-#' @importFrom ggplot2 ggplot
-#' @importFrom ggplot2 aes
-#' @importFrom ggplot2 geom_bar
-#' @importFrom ggplot2 coord_flip
-#' @importFrom ggplot2 xlab
-#' @importFrom ggplot2 ylab
-#' @importFrom ggplot2 ggtitle
-#' @importFrom ggplot2 theme
-#' @importFrom ggplot2 element_text
-#' @importFrom ggplot2 element_blank
-#' @importFrom Ckmeans.1d.dp Ckmeans.1d.dp
 #' @importFrom magrittr %>%
 #' @param importance_matrix a \code{data.table} returned by the \code{xgb.importance} function.
 #' @param numberOfClusters a \code{numeric} vector containing the min and the max range of the possible number of clusters of bars.
@@ -44,11 +33,17 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
   if (!"data.table" %in% class(importance_matrix))  {     
     stop("importance_matrix: Should be a data.table.")
   }
-    
+  if (!require(ggplot2, quietly = TRUE)) {
+    stop("ggplot2 package is required for plotting the importance", call. = FALSE)
+  }
+  if (!requireNamespace("Ckmeans.1d.dp", quietly = TRUE)) {
+    stop("Ckmeans.1d.dp package is required for plotting the importance", call. = FALSE)
+  }
+
   # To avoid issues in clustering when co-occurences are used
   importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
   
-  clusters <- suppressWarnings(Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
+  clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
   importance_matrix[,"Cluster":=clusters$cluster %>% as.character]
     
   plot <- ggplot(importance_matrix, aes(x=reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+  geom_bar(aes(fill=Cluster), stat="identity", position="identity") + coord_flip() + xlab("Features") + ylab("Gain") + ggtitle("Feature importance") + theme(plot.title = element_text(lineheight=.9, face="bold"), panel.grid.major.y = element_blank() )
diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R
index 7f7d74d57..edcd5f47f 100644
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -15,7 +15,6 @@
 #' @importFrom stringr str_split
 #' @importFrom stringr str_extract
 #' @importFrom stringr str_trim
-#' @importFrom DiagrammeR mermaid
 #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
 #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
 #' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
@@ -64,7 +63,11 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
   if (!class(model) %in% c("xgb.Booster", "NULL")) {
     stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
   }
-    
+  
+  if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
+    stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
+  }
+  
   if(is.null(model)){
     allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)  
   } else {
@@ -85,7 +88,7 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
   no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
   
   path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
-  mermaid(path, width, height)
+  DiagrammeR::mermaid(path, width, height)
 }
 
 # Avoid error messages during CRAN check.