diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 79d438917..1ae822eed 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -304,6 +304,7 @@ depr_par_lut <- matrix(c(
   'features.keep', 'features_keep',
   'plot.height','plot_height',
   'plot.width','plot_width',
+  'n_first_tree', 'trees',
   'dummy', 'DUMMY'
 ), ncol=2, byrow = TRUE)
 colnames(depr_par_lut) <- c('old', 'new')
diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R
index 2af98ca87..a364aaa70 100644
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -7,8 +7,12 @@
 #' @param model object of class \code{xgb.Booster}
 #' @param text \code{character} vector previously generated by the \code{xgb.dump} 
 #'          function  (where parameter \code{with_stats = TRUE} should have been set).
-#' @param n_first_tree limit the parsing to the \code{n} first trees. 
+#' @param trees an integer vector of tree indices that should be parsed.
 #'          If set to \code{NULL}, all trees of the model are parsed.
+#'          It could be useful, e.g., in multiclass classification to get only
+#'          the trees of one certain class. IMPORTANT: the tree index in xgboost model
+#'          is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).
+#' @param ... currently not used.
 #'
 #' @return 
 #' A \code{data.table} with detailed information about model trees' nodes.
@@ -16,9 +20,9 @@
 #' The columns of the \code{data.table} are:
 #' 
 #' \itemize{
-#'  \item \code{Tree}: ID of a tree in a model
-#'  \item \code{Node}: ID of a node in a tree
-#'  \item \code{ID}: unique identifier of a node in a model
+#'  \item \code{Tree}: ID of a tree in a model (integer)
+#'  \item \code{Node}: integer ID of a node in a tree (integer)
+#'  \item \code{ID}: identifier of a node in a model (character)
 #'  \item \code{Feature}: for a branch node, it's a feature id or name (when available);
 #'              for a leaf note, it simply labels it as \code{'Leaf'}
 #'  \item \code{Split}: location of the split for a branch node (split condition is always "less than")
@@ -47,8 +51,8 @@
 #'  
 #' @export
 xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
-                              n_first_tree = NULL){
-  
+                              trees = NULL, ...){
+  check.deprecation(...)
   if (!class(feature_names) %in% c("character", "NULL")) {
     stop("feature_names: Has to be a vector of character\n",
          "  or NULL if the model dump already contains feature names.\n",
@@ -61,8 +65,8 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
          "  (or NULL if the model was provided).")
   }
   
-  if (!class(n_first_tree) %in% c("numeric", "NULL") | length(n_first_tree) > 1) {
-    stop("n_first_tree: Has to be a numeric vector of size 1.")
+  if (!class(trees) %in% c("integer", "numeric", "NULL")) {
+    stop("trees: Has to be a vector of integers.")
   }
   
   if (is.null(text)){
@@ -84,10 +88,14 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
   td[position, Tree := 1L]
   td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L]
   
-  n_first_tree <- min(max(td$Tree), n_first_tree)
-  td <- td[Tree <= n_first_tree & !grepl('^booster', t)]
+  if (is.null(trees)) {
+    trees <- 0:max(td$Tree)
+  } else {
+    trees <- trees[trees >= 0 & trees <= max(td$Tree)]
+  }
+  td <- td[Tree %in% trees & !grepl('^booster', t)]
   
-  td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.numeric ]
+  td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.integer ]
   td[, ID := add.tree.id(Node, Tree)]
   td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))]
 
@@ -112,7 +120,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
   }]
   
   # convert some columns to numeric
-  numeric_cols <- c("Quality", "Cover")
+  numeric_cols <- c("Split", "Quality", "Cover")
   td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols=numeric_cols]
   
   td[, t := NULL]
diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R
index 4cf5b82a1..3cd565e02 100644
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -2,8 +2,8 @@
 #' 
 #' Visualization of the ensemble of trees as a single collective unit.
 #'
-#' @param model dump generated by the \code{xgb.train} function.
-#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
+#' @param model produced by the \code{xgb.train} function.
+#' @param feature_names names of each feature as a \code{character} vector.
 #' @param features_keep number of features to keep in each position of the multi trees.
 #' @param plot_width width in pixels of the graph to produce
 #' @param plot_height height in pixels of the graph to produce
@@ -13,21 +13,19 @@
 #' 
 #' @details
 #' 
-#' This function tries to capture the complexity of gradient boosted tree ensemble 
-#' in a cohesive way. 
+#' This function tries to capture the complexity of a gradient boosted tree model 
+#' in a cohesive way by compressing an ensemble of trees into a single tree-graph representation.
+#' The goal is to improve the interpretability of a model generally seen as black box.
 #' 
-#' The goal is to improve the interpretability of the model generally seen as black box.
-#' The function is dedicated to boosting applied to decision trees only.
-#' 
-#' The purpose is to move from an ensemble of trees to a single tree only.
+#' Note: this function is applicable to tree booster-based models only.
 #' 
 #' It takes advantage of the fact that the shape of a binary tree is only defined by 
-#' its deepness (therefore in a boosting model, all trees have the same shape). 
+#' its depth (therefore, in a boosting model, all trees have similar shape). 
 #' 
 #' Moreover, the trees tend to reuse the same features.
 #' 
-#' The function will project each tree on one, and keep for each position the 
-#' \code{features_keep} first features (based on Gain per feature measure).
+#' The function projects each tree onto one, and keeps for each position the 
+#' \code{features_keep} first features (based on the Gain per feature measure).
 #' 
 #' This function is inspired by this blog post:
 #' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
@@ -70,39 +68,61 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
   tree.matrix[!is.na(No),No:= paste0(abs.node.position, "_1")]
   
   
-  
   remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "")
   
-  tree.matrix[,`:=`(abs.node.position=remove.tree(abs.node.position), Yes=remove.tree(Yes), No=remove.tree(No))]
+  tree.matrix[,`:=`(abs.node.position = remove.tree(abs.node.position),
+                    Yes = remove.tree(Yes),
+                    No = remove.tree(No))]
   
-  nodes.dt <- tree.matrix[,.(Quality = sum(Quality)),by = .(abs.node.position, Feature)][,.(Text =paste0(Feature[1:min(length(Feature), features_keep)], " (", Quality[1:min(length(Quality), features_keep)], ")") %>% paste0(collapse = "\n")), by=abs.node.position]
-  edges.dt <- tree.matrix[Feature != "Leaf",.(abs.node.position, Yes)] %>% list(tree.matrix[Feature != "Leaf",.(abs.node.position, No)]) %>% rbindlist() %>% setnames(c("From", "To")) %>% .[,.N,.(From, To)] %>% .[,N:=NULL]
+  nodes.dt <- tree.matrix[
+        , .(Quality = sum(Quality))
+        , by = .(abs.node.position, Feature)
+      ][, .(Text = paste0(Feature[1:min(length(Feature), features_keep)],
+                          " (",
+                          format(Quality[1:min(length(Quality), features_keep)], digits=5),
+                          ")") %>%
+                   paste0(collapse = "\n"))
+        , by = abs.node.position]
   
-  nodes <- DiagrammeR::create_node_df(n = nrow(nodes.dt),
-                                    label = nodes.dt[,Text],
-                                    style = "filled",
-                                    color = "DimGray",
-                                    fillcolor= "Beige",
-                                    shape = "oval",
-                                    fontname = "Helvetica"
+  edges.dt <- tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)] %>%
+    list(tree.matrix[Feature != "Leaf",.(abs.node.position, No)]) %>%
+    rbindlist() %>%
+    setnames(c("From", "To")) %>%
+    .[, .N, .(From, To)] %>%
+    .[, N:=NULL]
+  
+  nodes <- DiagrammeR::create_node_df(
+    n = nrow(nodes.dt),
+    label = nodes.dt[,Text]
   )
   
-  edges <- DiagrammeR::create_edge_df(from = match(edges.dt[,From], nodes.dt[,abs.node.position]),
-                                    to = match(edges.dt[,To], nodes.dt[,abs.node.position]),
-                                    color = "DimGray", 
-                                    arrowsize = "1.5", 
-                                    arrowhead = "vee",
-                                    fontname = "Helvetica",
-                                    rel = "leading_to")
+  edges <- DiagrammeR::create_edge_df(
+    from = match(edges.dt[,From], nodes.dt[,abs.node.position]),
+    to = match(edges.dt[,To], nodes.dt[,abs.node.position]),
+    rel = "leading_to")
   
-  graph <- DiagrammeR::create_graph(nodes_df = nodes,
-                                    edges_df = edges)
+  graph <- DiagrammeR::create_graph(
+      nodes_df = nodes,
+      edges_df = edges,
+      attr_theme = NULL
+      ) %>%
+    DiagrammeR::add_global_graph_attrs(
+      attr_type = "graph",
+      attr  = c("layout", "rankdir"),
+      value = c("dot", "LR")
+      ) %>%
+    DiagrammeR::add_global_graph_attrs(
+      attr_type = "node",
+      attr  = c("color", "fillcolor", "style", "shape", "fontname"),
+      value = c("DimGray", "beige", "filled", "rectangle", "Helvetica")
+      ) %>%
+    DiagrammeR::add_global_graph_attrs(
+      attr_type = "edge",
+      attr  = c("color", "arrowsize", "arrowhead", "fontname"),
+      value = c("DimGray", "1.5", "vee", "Helvetica"))
   
   DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)  
 }
 
-globalVariables(
-  c(
-    ".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
-  )
-)
+globalVariables(c(".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos",
+                  "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"))
diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R
index 98ef008df..41b72c8a0 100644
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -2,37 +2,65 @@
 #' 
 #' Read a tree model text dump and plot the model. 
 #' 
-#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
-#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
-#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
+#' @param feature_names names of each feature as a \code{character} vector.
+#' @param model produced by the \code{xgb.train} function.
+#' @param trees an integer vector of tree indices that should be visualized.
+#'          If set to \code{NULL}, all trees of the model are included.
+#'          IMPORTANT: the tree index in xgboost model is zero-based
+#'          (e.g., use \code{trees = 0:2} for the first 3 trees in a model).
 #' @param plot_width  the width of the diagram in pixels.
 #' @param plot_height	the height of the diagram in pixels.
+#' @param render a logical flag for whether the graph should be rendered (see Value).
+#' @param show_node_id a logical flag for whether to include node id's in the graph.
 #' @param ... currently not used.
 #'
-#' @return A \code{DiagrammeR} of the model.
-#'
 #' @details 
 #' 
 #' The content of each node is organised that way:
 #' 
 #' \itemize{
-#'  \item \code{feature} value;
-#'  \item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be;
-#'  \item \code{gain}: metric the importance of the node in the model.
+#'  \item Feature name.
+#'  \item \code{Cover}: The sum of second order gradient of training data classified to the leaf.
+#'        If it is square loss, this simply corresponds to the number of instances seen by a split
+#'        or collected by a leaf during training.
+#'        The deeper in the tree a node is, the lower this metric will be.
+#'  \item \code{Gain} (for split nodes): the information gain metric of a split
+#'        (corresponds to the importance of the node in the model).
+#'  \item \code{Value} (for leafs): the margin value that the leaf may contribute to prediction.
 #' } 
+#' The tree root nodes also indicate the Tree index (0-based).
 #' 
-#' The function uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
+#' The "Yes" branches are marked by the "< split_value" label.
+#' The branches that also used for missing values are marked as bold
+#' (as in "carrying extra capacity").
+#' 
+#' This function uses \href{http://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR.
 #'  
+#' @return
+#' 
+#' When \code{render = TRUE}:
+#' returns a rendered graph object which is an \code{htmlwidget} of class \code{grViz}.
+#' Similar to ggplot objects, it needs to be printed to see it when not running from command line.
+#' 
+#' When \code{render = FALSE}:
+#' silently returns a graph object which is of DiagrammeR's class \code{dgr_graph}.
+#' This could be useful if one wants to modify some of the graph attributes
+#' before rendering the graph with \code{\link[DiagrammeR]{render_graph}}.
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' 
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
 #'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
-#' 
+#' # plot all the trees
 #' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst)
+#' # plot only the first tree and include the node ID:
+#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst,
+#'               trees = 0, show_node_id = TRUE)
 #' 
 #' @export
-xgb.plot.tree <- function(feature_names = NULL, model = NULL, n_first_tree = NULL, plot_width = NULL, plot_height = NULL, ...){
+xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
+                          render = TRUE, show_node_id = FALSE, ...){
   check.deprecation(...)
   if (class(model) != "xgb.Booster") {
     stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
@@ -42,34 +70,55 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, n_first_tree = NUL
     stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
   }
   
-  allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
-  
-  allTrees[, label:= paste0(Feature, "\nCover: ", Cover, "\nGain: ", Quality)]
-  allTrees[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
-  allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
-  
-  # rev is used to put the first tree on top.
-  nodes <- DiagrammeR::create_node_df(n = length(allTrees[,ID] %>% rev),
-                 label = allTrees[,label] %>% rev,
-                 style = "filled",
-                 color = "DimGray",
-                 fillcolor= allTrees[,filledcolor] %>% rev,
-                 shape = allTrees[,shape] %>% rev,
-                 data = allTrees[,Feature] %>% rev,
-                 fontname = "Helvetica"
-                 )
-  
-  edges <- DiagrammeR::create_edge_df(from = match(allTrees[Feature != "Leaf", c(ID)] %>% rep(2), allTrees[,ID] %>% rev),
-                        to = match(allTrees[Feature != "Leaf", c(Yes, No)],allTrees[,ID] %>% rev),
-                        label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
-                        color = "DimGray", 
-                        arrowsize = "1.5", 
-                        arrowhead = "vee",
-                        fontname = "Helvetica",
-                        rel = "leading_to")
+  dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)
 
-  graph <- DiagrammeR::create_graph(nodes_df = nodes,
-                        edges_df = edges)
+  dt[, label:= paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Quality)]
+  if (show_node_id)
+    dt[, label := paste0(ID, ": ", label)]
+  dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
+  dt[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
+  dt[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
+  # in order to draw the first tree on top:
+  dt <- dt[order(-Tree)]
+  
+  nodes <- DiagrammeR::create_node_df(
+    n         = nrow(dt),
+    ID        = dt$ID,
+    label     = dt$label,
+    fillcolor = dt$filledcolor,
+    shape     = dt$shape,
+    data      = dt$Feature)
+  
+  edges <- DiagrammeR::create_edge_df(
+    from  = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
+    to    = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
+    label = dt[Feature != "Leaf", paste("<", Split)] %>%
+            c(rep("", nrow(dt[Feature != "Leaf"]))),
+    style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>%
+            c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]),
+    rel   = "leading_to")
+
+  graph <- DiagrammeR::create_graph(
+      nodes_df = nodes,
+      edges_df = edges,
+      attr_theme = NULL
+      ) %>%
+    DiagrammeR::add_global_graph_attrs(
+      attr_type = "graph",
+      attr  = c("layout", "rankdir"),
+      value = c("dot", "LR")
+      ) %>%
+    DiagrammeR::add_global_graph_attrs(
+      attr_type = "node",
+      attr  = c("color", "style", "fontname"),
+      value = c("DimGray", "filled", "Helvetica")
+      ) %>%
+    DiagrammeR::add_global_graph_attrs(
+      attr_type = "edge",
+      attr  = c("color", "arrowsize", "arrowhead", "fontname"),
+      value = c("DimGray", "1.5", "vee", "Helvetica"))
+  
+  if (!render) return(invisible(graph))
   
   DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)
 }
diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd
index 8c839be20..8176303c6 100644
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -5,7 +5,7 @@
 \title{Parse a boosted tree model text dump}
 \usage{
 xgb.model.dt.tree(feature_names = NULL, model = NULL, text = NULL,
-  n_first_tree = NULL)
+  trees = NULL, ...)
 }
 \arguments{
 \item{feature_names}{character vector of feature names. If the model already
@@ -16,8 +16,13 @@ contains feature names, this argument should be \code{NULL} (default value)}
 \item{text}{\code{character} vector previously generated by the \code{xgb.dump} 
 function  (where parameter \code{with_stats = TRUE} should have been set).}
 
-\item{n_first_tree}{limit the parsing to the \code{n} first trees. 
-If set to \code{NULL}, all trees of the model are parsed.}
+\item{trees}{an integer vector of tree indices that should be parsed.
+If set to \code{NULL}, all trees of the model are parsed.
+It could be useful, e.g., in multiclass classification to get only
+the trees of one certain class. IMPORTANT: the tree index in xgboost model
+is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).}
+
+\item{...}{currently not used.}
 }
 \value{
 A \code{data.table} with detailed information about model trees' nodes.
@@ -25,9 +30,9 @@ A \code{data.table} with detailed information about model trees' nodes.
 The columns of the \code{data.table} are:
 
 \itemize{
- \item \code{Tree}: ID of a tree in a model
- \item \code{Node}: ID of a node in a tree
- \item \code{ID}: unique identifier of a node in a model
+ \item \code{Tree}: ID of a tree in a model (integer)
+ \item \code{Node}: integer ID of a node in a tree (integer)
+ \item \code{ID}: identifier of a node in a model (character)
  \item \code{Feature}: for a branch node, it's a feature id or name (when available);
              for a leaf note, it simply labels it as \code{'Leaf'}
  \item \code{Split}: location of the split for a branch node (split condition is always "less than")
diff --git a/R-package/man/xgb.plot.multi.trees.Rd b/R-package/man/xgb.plot.multi.trees.Rd
index 1ab9adf3f..faef94555 100644
--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@@ -8,9 +8,9 @@ xgb.plot.multi.trees(model, feature_names = NULL, features_keep = 5,
   plot_width = NULL, plot_height = NULL, ...)
 }
 \arguments{
-\item{model}{dump generated by the \code{xgb.train} function.}
+\item{model}{produced by the \code{xgb.train} function.}
 
-\item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
+\item{feature_names}{names of each feature as a \code{character} vector.}
 
 \item{features_keep}{number of features to keep in each position of the multi trees.}
 
@@ -27,21 +27,19 @@ Two graphs showing the distribution of the model deepness.
 Visualization of the ensemble of trees as a single collective unit.
 }
 \details{
-This function tries to capture the complexity of gradient boosted tree ensemble 
-in a cohesive way. 
+This function tries to capture the complexity of a gradient boosted tree model 
+in a cohesive way by compressing an ensemble of trees into a single tree-graph representation.
+The goal is to improve the interpretability of a model generally seen as black box.
 
-The goal is to improve the interpretability of the model generally seen as black box.
-The function is dedicated to boosting applied to decision trees only.
-
-The purpose is to move from an ensemble of trees to a single tree only.
+Note: this function is applicable to tree booster-based models only.
 
 It takes advantage of the fact that the shape of a binary tree is only defined by 
-its deepness (therefore in a boosting model, all trees have the same shape). 
+its depth (therefore, in a boosting model, all trees have similar shape). 
 
 Moreover, the trees tend to reuse the same features.
 
-The function will project each tree on one, and keep for each position the 
-\code{features_keep} first features (based on Gain per feature measure).
+The function projects each tree onto one, and keeps for each position the 
+\code{features_keep} first features (based on the Gain per feature measure).
 
 This function is inspired by this blog post:
 \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd
index 3620699bd..c4b7a6db3 100644
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@@ -4,24 +4,39 @@
 \alias{xgb.plot.tree}
 \title{Plot a boosted tree model}
 \usage{
-xgb.plot.tree(feature_names = NULL, model = NULL, n_first_tree = NULL,
-  plot_width = NULL, plot_height = NULL, ...)
+xgb.plot.tree(feature_names = NULL, model = NULL, trees = NULL,
+  plot_width = NULL, plot_height = NULL, render = TRUE,
+  show_node_id = FALSE, ...)
 }
 \arguments{
-\item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
+\item{feature_names}{names of each feature as a \code{character} vector.}
 
-\item{model}{generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
+\item{model}{produced by the \code{xgb.train} function.}
 
-\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
+\item{trees}{an integer vector of tree indices that should be visualized.
+If set to \code{NULL}, all trees of the model are included.
+IMPORTANT: the tree index in xgboost model is zero-based
+(e.g., use \code{trees = 0:2} for the first 3 trees in a model).}
 
 \item{plot_width}{the width of the diagram in pixels.}
 
 \item{plot_height}{the height of the diagram in pixels.}
 
+\item{render}{a logical flag for whether the graph should be rendered (see Value).}
+
+\item{show_node_id}{a logical flag for whether to include node id's in the graph.}
+
 \item{...}{currently not used.}
 }
 \value{
-A \code{DiagrammeR} of the model.
+When \code{render = TRUE}:
+returns a rendered graph object which is an \code{htmlwidget} of class \code{grViz}.
+Similar to ggplot objects, it needs to be printed to see it when not running from command line.
+
+When \code{render = FALSE}:
+silently returns a graph object which is of DiagrammeR's class \code{dgr_graph}.
+This could be useful if one wants to modify some of the graph attributes
+before rendering the graph with \code{\link[DiagrammeR]{render_graph}}.
 }
 \description{
 Read a tree model text dump and plot the model.
@@ -30,20 +45,33 @@ Read a tree model text dump and plot the model.
 The content of each node is organised that way:
 
 \itemize{
- \item \code{feature} value;
- \item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be;
- \item \code{gain}: metric the importance of the node in the model.
+ \item Feature name.
+ \item \code{Cover}: The sum of second order gradient of training data classified to the leaf.
+       If it is square loss, this simply corresponds to the number of instances seen by a split
+       or collected by a leaf during training.
+       The deeper in the tree a node is, the lower this metric will be.
+ \item \code{Gain} (for split nodes): the information gain metric of a split
+       (corresponds to the importance of the node in the model).
+ \item \code{Value} (for leafs): the margin value that the leaf may contribute to prediction.
 } 
+The tree root nodes also indicate the Tree index (0-based).
 
-The function uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
+The "Yes" branches are marked by the "< split_value" label.
+The branches that also used for missing values are marked as bold
+(as in "carrying extra capacity").
+
+This function uses \href{http://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR.
 }
 \examples{
 data(agaricus.train, package='xgboost')
 
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2, 
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
-
+# plot all the trees
 xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst)
+# plot only the first tree and include the node ID:
+xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst,
+              trees = 0, show_node_id = TRUE)
 
 }