diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst
index 6ef42c067..ad6ec3659 100644
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@@ -82,6 +82,8 @@ Dask API
 
 .. autofunction:: xgboost.dask.DaskDMatrix
 
+.. autofunction:: xgboost.dask.train
+
 .. autofunction:: xgboost.dask.predict
 
 .. autofunction:: xgboost.dask.DaskXGBClassifier
diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst
index d5079b403..94167d487 100644
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@@ -77,6 +77,27 @@ interface with ``DaskXGBClassifier`` and ``DaskXGBRegressor``.  See ``xgboost/de
 for more examples.
 
 
+*****************************************************************************
+Why is the initialization of ``DaskDMatrix``  so slow and throws weird errors
+*****************************************************************************
+
+The dask API in XGBoost requires construction of ``DaskDMatrix``.  With ``Scikit-Learn``
+interface, ``DaskDMatrix`` is implicitly constructed for each input data during `fit` or
+`predict`.  You might have observed its construction is taking incredible amount of time,
+and sometimes throws error that doesn't seem to be relevant to `DaskDMatrix`.  Here is a
+brief explanation for why.  By default most of dask's computation is `lazy
+<https://docs.dask.org/en/latest/user-interfaces.html#laziness-and-computing>`_, which
+means the computation is not carried out until you explicitly ask for result, either by
+calling `compute()` or `wait()`.  See above link for details in dask, and `this wiki
+<https://en.wikipedia.org/wiki/Lazy_evaluation>`_ for general concept of lazy evaluation.
+The `DaskDMatrix` constructor forces all lazy computation to materialize, which means it's
+where all your earlier computation actually being carried out, including operations like
+`dd.read_csv()`.  To isolate the computation in `DaskDMatrix` from other lazy
+computations, one can explicitly wait for results of input data before calling constructor
+of `DaskDMatrix`.  Also dask's `web interface
+<https://distributed.dask.org/en/latest/web.html>`_ can be used to monitor what operations
+are currently being performed.
+
 ***********
 Limitations
 ***********
diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
index e6e392d3f..b1e0fafdc 100644
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -113,25 +113,28 @@ def _assert_client(client):
 
 class DaskDMatrix:
     # pylint: disable=missing-docstring, too-many-instance-attributes
-    '''DMatrix holding on references to Dask DataFrame or Dask Array.
+    '''DMatrix holding on references to Dask DataFrame or Dask Array.  Constructing
+    a `DaskDMatrix` forces all lazy computation to be carried out.  Wait for
+    the input data explicitly if you want to see actual computation of
+    constructing `DaskDMatrix`.
 
-      Parameters
-      ----------
-      client: dask.distributed.Client
+    Parameters
+    ----------
+    client: dask.distributed.Client
         Specify the dask client used for training.  Use default client
         returned from dask if it's set to None.
-      data : dask.array.Array/dask.dataframe.DataFrame
+    data : dask.array.Array/dask.dataframe.DataFrame
         data source of DMatrix.
-      label: dask.array.Array/dask.dataframe.DataFrame
+    label: dask.array.Array/dask.dataframe.DataFrame
         label used for trainin.
-      missing : float, optional
-          Value in the  input data (e.g. `numpy.ndarray`) which needs
-          to be present as a missing value. If None, defaults to np.nan.
-      weight : dask.array.Array/dask.dataframe.DataFrame
+    missing : float, optional
+        Value in the  input data (e.g. `numpy.ndarray`) which needs
+        to be present as a missing value. If None, defaults to np.nan.
+    weight : dask.array.Array/dask.dataframe.DataFrame
         Weight for each instance.
-      feature_names : list, optional
+    feature_names : list, optional
         Set names for features.
-      feature_types : list, optional
+    feature_types : list, optional
         Set types for features
 
     '''
@@ -349,23 +352,23 @@ def train(client, params, dtrain, *args, evals=(), **kwargs):
     Parameters
     ----------
     client: dask.distributed.Client
-      Specify the dask client used for training.  Use default client
-      returned from dask if it's set to None.
-
-    Other parameters are the same as `xgboost.train` except for `evals_result`,
-    which is returned as part of function return value instead of argument.
+        Specify the dask client used for training.  Use default client
+        returned from dask if it's set to None.
+    \\*\\*kwargs:
+        Other parameters are the same as `xgboost.train` except for `evals_result`,
+        which is returned as part of function return value instead of argument.
 
     Returns
     -------
     results: dict
-      A dictionary containing trained booster and evaluation history.
-     `history` field is the same as `eval_result` from `xgboost.train`.
+        A dictionary containing trained booster and evaluation history.
+        `history` field is the same as `eval_result` from `xgboost.train`.
 
-      .. code-block:: python
+        .. code-block:: python
 
-        {'booster': xgboost.Booster,
-         'history': {'train': {'logloss': ['0.48253', '0.35953']},
-                     'eval': {'logloss': ['0.480385', '0.357756']}}}
+            {'booster': xgboost.Booster,
+             'history': {'train': {'logloss': ['0.48253', '0.35953']},
+                         'eval': {'logloss': ['0.480385', '0.357756']}}}
 
     '''
     _assert_dask_support()
@@ -420,15 +423,15 @@ def train(client, params, dtrain, *args, evals=(), **kwargs):
 def predict(client, model, data, *args):
     '''Run prediction with a trained booster.
 
-      .. note::
+    .. note::
 
-          Only default prediction mode is supported right now.
+        Only default prediction mode is supported right now.
 
     Parameters
     ----------
     client: dask.distributed.Client
-      Specify the dask client used for training.  Use default client
-      returned from dask if it's set to None.
+        Specify the dask client used for training.  Use default client
+        returned from dask if it's set to None.
     model: A Booster or a dictionary returned by `xgboost.dask.train`.
         The trained model.
     data: DaskDMatrix
diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py
index 5ac8d177d..772951583 100644
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -136,26 +136,26 @@ def to_graphviz(booster, fmap='', num_trees=0, rankdir=None,
         Edge color when meets the node condition.
     no_color : str, default '#FF0000'
         Edge color when doesn't meet the node condition.
-    condition_node_params : dict (optional)
+    condition_node_params : dict, optional
         Condition node configuration for for graphviz.  Example:
 
         .. code-block:: python
 
-        {'shape': 'box',
-         'style': 'filled,rounded',
-         'fillcolor': '#78bceb'}
+            {'shape': 'box',
+             'style': 'filled,rounded',
+             'fillcolor': '#78bceb'}
 
-    leaf_node_params : dict (optional)
+    leaf_node_params : dict, optional
         Leaf node configuration for graphviz. Example:
 
         .. code-block:: python
 
-        {'shape': 'box',
-         'style': 'filled',
-         'fillcolor': '#e48038'}
+            {'shape': 'box',
+             'style': 'filled',
+             'fillcolor': '#e48038'}
 
-    kwargs : Other keywords passed to graphviz graph_attr, E.g.:
-        ``graph [ {key} = {value} ]``
+    \\*\\*kwargs: dict, optional
+        Other keywords passed to graphviz graph_attr, e.g. ``graph [ {key} = {value} ]``
 
     Returns
     -------