Handle duplicated values in sketching. (#6178)

* Accumulate weights in duplicated values.
* Fix device id in iterative dmatrix.
This commit is contained in:
Jiaming Yuan
2020-10-10 19:32:44 +08:00
committed by GitHub
parent ab5b35134f
commit 2241563f23
9 changed files with 250 additions and 54 deletions

View File

@@ -106,6 +106,8 @@ class SketchContainer {
}
/* \brief Return GPU ID for this container. */
int32_t DeviceIdx() const { return device_; }
/* \brief Accumulate weights of duplicated entries in input. */
size_t ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_columns_ptr_in);
/* \brief Removes all the duplicated elements in quantile structure. */
size_t Unique();
/* Fix rounding error and re-establish invariance. The error is mostly generated by the
@@ -121,7 +123,7 @@ class SketchContainer {
* \param weights (optional) data weights.
*/
void Push(Span<Entry const> entries, Span<size_t> columns_ptr,
common::Span<OffsetT const> cuts_ptr, size_t total_cuts,
common::Span<OffsetT> cuts_ptr, size_t total_cuts,
Span<float> weights = {});
/* \brief Prune the quantile structure.
*