External memory support for hist (#7531)

* Generate column matrix from gHistIndex.
* Avoid synchronization with the sparse page once the cache is written.
* Cleanups: Remove member variables/functions, change the update routine to look like approx and gpu_hist.
* Remove pruner.
This commit is contained in:
Jiaming Yuan
2022-03-22 00:13:20 +08:00
committed by GitHub
parent cd55823112
commit 4d81c741e9
25 changed files with 563 additions and 686 deletions

View File

@@ -7,6 +7,9 @@ instead of Quantile DMatrix. The feature is not ready for production use yet.
.. versionadded:: 1.5.0
See :doc:`the tutorial </tutorials/external_memory>` for more details.
"""
import os
import xgboost
@@ -77,9 +80,14 @@ def main(tmpdir: str) -> xgboost.Booster:
missing = np.NaN
Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
# Other tree methods including ``hist`` and ``gpu_hist`` also work, but has some
# caveats. This is still an experimental feature.
booster = xgboost.train({"tree_method": "approx"}, Xy, evals=[(Xy, "Train")])
# Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in
# doc for details.
booster = xgboost.train(
{"tree_method": "approx", "max_depth": 2},
Xy,
evals=[(Xy, "Train")],
num_boost_round=10,
)
return booster

View File

@@ -27,7 +27,7 @@ def main(args):
dtrain.set_info(feature_weights=fw)
bst = xgboost.train({'tree_method': 'hist',
'colsample_bynode': 0.5},
'colsample_bynode': 0.2},
dtrain, num_boost_round=10,
evals=[(dtrain, 'd')])
feature_map = bst.get_fscore()