From 4be75d852c3aa1644256d491e82e388cc1a5bb32 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Mar 2023 00:33:08 +0800 Subject: [PATCH] [backport] Fix scope of feature set pointers (#8850) (#8972) --------- Co-authored-by: Rory Mitchell --- src/tree/updater_gpu_hist.cu | 4 ++++ tests/python-gpu/test_gpu_updaters.py | 31 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index c17252902..8625395c2 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -306,6 +306,8 @@ struct GPUHistMakerDevice { matrix.is_dense }; dh::TemporaryArray entries(2 * candidates.size()); + // Store the feature set ptrs so they dont go out of scope before the kernel is called + std::vector>> feature_sets; for (size_t i = 0; i < candidates.size(); i++) { auto candidate = candidates.at(i); int left_nidx = tree[candidate.nid].LeftChild(); @@ -314,10 +316,12 @@ struct GPUHistMakerDevice { nidx[i * 2 + 1] = right_nidx; auto left_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(left_nidx)); left_sampled_features->SetDevice(ctx_->gpu_id); + feature_sets.emplace_back(left_sampled_features); common::Span left_feature_set = interaction_constraints.Query(left_sampled_features->DeviceSpan(), left_nidx); auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx)); right_sampled_features->SetDevice(ctx_->gpu_id); + feature_sets.emplace_back(right_sampled_features); common::Span right_feature_set = interaction_constraints.Query(right_sampled_features->DeviceSpan(), right_nidx); diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index e86152327..10610d1a8 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -215,3 +215,34 @@ class TestGPUUpdaters: @pytest.mark.parametrize("weighted", [True, False]) def test_adaptive(self, weighted) -> None: self.cputest.run_adaptive("gpu_hist", weighted) + + @pytest.mark.skipif(**tm.no_pandas()) + def test_issue8824(self): + # column sampling by node crashes because shared pointers go out of scope + import pandas as pd + + data = pd.DataFrame(np.random.rand(1024, 8)) + data.columns = "x" + data.columns.astype(str) + features = data.columns + data["y"] = data.sum(axis=1) < 4 + dtrain = xgb.DMatrix(data[features], label=data["y"]) + model = xgb.train( + dtrain=dtrain, + params={ + "max_depth": 5, + "learning_rate": 0.05, + "objective": "binary:logistic", + "tree_method": "gpu_hist", + "colsample_bytree": 0.5, + "colsample_bylevel": 0.5, + "colsample_bynode": 0.5, # Causes issues + "reg_alpha": 0.05, + "reg_lambda": 0.005, + "seed": 66, + "subsample": 0.5, + "gamma": 0.2, + "predictor": "auto", + "eval_metric": "auc", + }, + num_boost_round=150, + )