From ca998df9122eae085e9f91b7c41144a750c3e826 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 31 Mar 2021 15:43:11 +0800 Subject: [PATCH] Clarify the behavior of `use_rmm`. (#6808) * Clarify the `use_rmm` flag in document and demo. --- demo/rmm_plugin/README.md | 16 ++++++++++++++++ demo/rmm_plugin/rmm_mgpu_with_dask.py | 8 ++++++-- demo/rmm_plugin/rmm_singlegpu.py | 15 ++++++++++----- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/demo/rmm_plugin/README.md b/demo/rmm_plugin/README.md index ad73c61f3..bf6e7f12d 100644 --- a/demo/rmm_plugin/README.md +++ b/demo/rmm_plugin/README.md @@ -27,5 +27,21 @@ cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_ cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm ``` +# Informing XGBoost about RMM pool + +When XGBoost is compiled with RMM, most of the large size allocation will go through RMM +allocators, but some small allocations in performance critical areas are using a different +caching allocator so that we can have better control over memory allocation behavior. +Users can override this behavior and force the use of rmm for all allocations by setting +the global configuration ``use_rmm``: + +``` python +with xgb.config_context(use_rmm=True): + clf = xgb.XGBClassifier(tree_method="gpu_hist") +``` + +Depending on the choice of memory pool size or type of allocator, this may have negative +performance impact. + * [Using RMM with a single GPU](./rmm_singlegpu.py) * [Using RMM with a local Dask cluster consisting of multiple GPUs](./rmm_mgpu_with_dask.py) diff --git a/demo/rmm_plugin/rmm_mgpu_with_dask.py b/demo/rmm_plugin/rmm_mgpu_with_dask.py index a147e3072..23c1f794e 100644 --- a/demo/rmm_plugin/rmm_mgpu_with_dask.py +++ b/demo/rmm_plugin/rmm_mgpu_with_dask.py @@ -4,11 +4,14 @@ import dask from dask.distributed import Client from dask_cuda import LocalCUDACluster + def main(client): - # Inform XGBoost that RMM is used for GPU memory allocation - xgb.set_config(use_rmm=True) + # Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md + # xgb.set_config(use_rmm=True) X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3) + # In pratice one should prefer loading the data with dask collections instead of using + # `from_array`. X = dask.array.from_array(X) y = dask.array.from_array(y) dtrain = xgb.dask.DaskDMatrix(client, X, label=y) @@ -22,6 +25,7 @@ def main(client): for i, e in enumerate(history['train']['merror']): print(f'[{i}] train-merror: {e}') + if __name__ == '__main__': # To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option to # LocalCUDACluster constructor. diff --git a/demo/rmm_plugin/rmm_singlegpu.py b/demo/rmm_plugin/rmm_singlegpu.py index 02caa1cc7..6b7d1b58c 100644 --- a/demo/rmm_plugin/rmm_singlegpu.py +++ b/demo/rmm_plugin/rmm_singlegpu.py @@ -4,13 +4,18 @@ from sklearn.datasets import make_classification # Initialize RMM pool allocator rmm.reinitialize(pool_allocator=True) -# Inform XGBoost that RMM is used for GPU memory allocation -xgb.set_config(use_rmm=True) +# Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md +# xgb.set_config(use_rmm=True) X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3) dtrain = xgb.DMatrix(X, label=y) -params = {'max_depth': 8, 'eta': 0.01, 'objective': 'multi:softprob', 'num_class': 3, - 'tree_method': 'gpu_hist'} +params = { + "max_depth": 8, + "eta": 0.01, + "objective": "multi:softprob", + "num_class": 3, + "tree_method": "gpu_hist", +} # XGBoost will automatically use the RMM pool allocator -bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, 'train')]) +bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, "train")])