Fix external memory with gpu_hist and subsampling combination bug. (#7481)

Instead of accessing data from the `original_page_`, access the data from the first page of the available batch.

fix #7476

Co-authored-by: jiamingy <jm.yuan@outlook.com>
This commit is contained in:
Ginko Balboa
2021-12-24 04:15:35 +01:00
committed by GitHub
parent 7f399eac8b
commit 29bfa94bb6
4 changed files with 61 additions and 33 deletions

View File

@@ -66,14 +66,12 @@ class UniformSampling : public SamplingStrategy {
/*! \brief No sampling in external memory mode. */
class ExternalMemoryUniformSampling : public SamplingStrategy {
public:
ExternalMemoryUniformSampling(EllpackPageImpl const* page,
size_t n_rows,
ExternalMemoryUniformSampling(size_t n_rows,
BatchParam batch_param,
float subsample);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
private:
EllpackPageImpl const* original_page_;
BatchParam batch_param_;
float subsample_;
std::unique_ptr<EllpackPageImpl> page_;
@@ -100,14 +98,12 @@ class GradientBasedSampling : public SamplingStrategy {
/*! \brief Gradient-based sampling in external memory mode.. */
class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
public:
ExternalMemoryGradientBasedSampling(EllpackPageImpl const* page,
size_t n_rows,
ExternalMemoryGradientBasedSampling(size_t n_rows,
BatchParam batch_param,
float subsample);
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
private:
EllpackPageImpl const* original_page_;
BatchParam batch_param_;
float subsample_;
dh::caching_device_vector<float> threshold_;