Clarify the behavior of use_rmm. (#6808)

* Clarify the `use_rmm` flag in document and demo.
2021-03-31 15:43:11 +08:00 · 2021-03-31 15:43:11 +08:00 · ca998df912
commit ca998df912
parent 3039dd194b
3 changed files with 32 additions and 7 deletions
--- a/demo/rmm_plugin/README.md
+++ b/demo/rmm_plugin/README.md
@ -27,5 +27,21 @@ cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_
 cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm
 ```
 # Informing XGBoost about RMM pool
 When XGBoost is compiled with RMM, most of the large size allocation will go through RMM
 allocators, but some small allocations in performance critical areas are using a different
 caching allocator so that we can have better control over memory allocation behavior.
 Users can override this behavior and force the use of rmm for all allocations by setting
 the global configuration ``use_rmm``:
 ``` python
 with xgb.config_context(use_rmm=True):
    clf = xgb.XGBClassifier(tree_method="gpu_hist")
 ```
 Depending on the choice of memory pool size or type of allocator, this may have negative
 performance impact.
 * [Using RMM with a single GPU](./rmm_singlegpu.py)
 * [Using RMM with a local Dask cluster consisting of multiple GPUs](./rmm_mgpu_with_dask.py)
--- a/demo/rmm_plugin/rmm_mgpu_with_dask.py
+++ b/demo/rmm_plugin/rmm_mgpu_with_dask.py
@ -4,11 +4,14 @@ import dask
 from dask.distributed import Client
 from dask_cuda import LocalCUDACluster
 def main(client):
-    # Inform XGBoost that RMM is used for GPU memory allocation
+    # Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md
-    xgb.set_config(use_rmm=True)
+    # xgb.set_config(use_rmm=True)
    X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3)
    # In pratice one should prefer loading the data with dask collections instead of using
    # `from_array`.
    X = dask.array.from_array(X)
    y = dask.array.from_array(y)
    dtrain = xgb.dask.DaskDMatrix(client, X, label=y)
@ -22,6 +25,7 @@ def main(client):
    for i, e in enumerate(history['train']['merror']):
        print(f'[{i}] train-merror: {e}')
 if __name__ == '__main__':
    # To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option to
    # LocalCUDACluster constructor.
--- a/demo/rmm_plugin/rmm_singlegpu.py
+++ b/demo/rmm_plugin/rmm_singlegpu.py
@ -4,13 +4,18 @@ from sklearn.datasets import make_classification
 # Initialize RMM pool allocator
 rmm.reinitialize(pool_allocator=True)
-# Inform XGBoost that RMM is used for GPU memory allocation
+# Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md
-xgb.set_config(use_rmm=True)
+# xgb.set_config(use_rmm=True)
 X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3)
 dtrain = xgb.DMatrix(X, label=y)
-params = {'max_depth': 8, 'eta': 0.01, 'objective': 'multi:softprob', 'num_class': 3,
+params = {
-          'tree_method': 'gpu_hist'}
+    "max_depth": 8,
    "eta": 0.01,
    "objective": "multi:softprob",
    "num_class": 3,
    "tree_method": "gpu_hist",
 }
 # XGBoost will automatically use the RMM pool allocator
-bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, 'train')])
+bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, "train")])