External memory support for hist (#7531)

* Generate column matrix from gHistIndex. * Avoid synchronization with the sparse page once the cache is written. * Cleanups: Remove member variables/functions, change the update routine to look like approx and gpu_hist. * Remove pruner.
2022-03-22 00:13:20 +08:00
parent cd55823112
commit 4d81c741e9
25 changed files with 563 additions and 686 deletions
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -7,6 +7,9 @@ instead of Quantile DMatrix.  The feature is not ready for production use yet.

    .. versionadded:: 1.5.0

+
+See :doc:`the tutorial </tutorials/external_memory>` for more details.
+
 """
 import os
 import xgboost
@@ -77,9 +80,14 @@ def main(tmpdir: str) -> xgboost.Booster:
    missing = np.NaN
    Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)

-    # Other tree methods including ``hist`` and ``gpu_hist`` also work, but has some
-    # caveats.  This is still an experimental feature.
-    booster = xgboost.train({"tree_method": "approx"}, Xy, evals=[(Xy, "Train")])
+    # Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in
+    # doc for details.
+    booster = xgboost.train(
+        {"tree_method": "approx", "max_depth": 2},
+        Xy,
+        evals=[(Xy, "Train")],
+        num_boost_round=10,
+    )
    return booster


--- a/demo/guide-python/feature_weights.py
+++ b/demo/guide-python/feature_weights.py
@@ -27,7 +27,7 @@ def main(args):
    dtrain.set_info(feature_weights=fw)

    bst = xgboost.train({'tree_method': 'hist',
-                         'colsample_bynode': 0.5},
+                         'colsample_bynode': 0.2},
                        dtrain, num_boost_round=10,
                        evals=[(dtrain, 'd')])
    feature_map = bst.get_fscore()