Initial support for external memory in gradient index. (#7183)

* Add hessian to batch param in preparation of new approx impl.
* Extract a push method for gradient index matrix.
* Use span instead of vector ref for hessian in sketching.
* Create a binary format for gradient index.
This commit is contained in:
Jiaming Yuan
2021-09-13 12:40:56 +08:00
committed by GitHub
parent a0dcf6f5c1
commit 3515931305
26 changed files with 546 additions and 171 deletions

View File

@@ -9,6 +9,7 @@
#include <algorithm>
#include <limits>
#include <utility>
#include "../../common/compressed_iterator.h"
#include "../../common/random.h"
@@ -185,10 +186,10 @@ GradientBasedSample UniformSampling::Sample(common::Span<GradientPair> gpair, DM
ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param,
BatchParam batch_param,
float subsample)
: original_page_(page),
batch_param_(batch_param),
batch_param_(std::move(batch_param)),
subsample_(subsample),
sample_row_index_(n_rows) {}
@@ -259,10 +260,10 @@ GradientBasedSample GradientBasedSampling::Sample(common::Span<GradientPair> gpa
ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(
EllpackPageImpl const* page,
size_t n_rows,
const BatchParam& batch_param,
BatchParam batch_param,
float subsample)
: original_page_(page),
batch_param_(batch_param),
batch_param_(std::move(batch_param)),
subsample_(subsample),
threshold_(n_rows + 1, 0.0f),
grad_sum_(n_rows, 0.0f),