Fix and cleanup for column matrix. (#7901)

* Fix missed type dispatching for dense columns with missing values.
* Code cleanup to reduce special cases.
* Reduce memory usage.
This commit is contained in:
Jiaming Yuan
2022-05-16 21:11:50 +08:00
committed by GitHub
parent 1496789561
commit 4fcfd9c96e
10 changed files with 124 additions and 136 deletions

View File

@@ -113,7 +113,7 @@ class HistogramCuts {
auto end = ptrs[column_id + 1];
auto beg = ptrs[column_id];
auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
bst_bin_t idx = it - values.cbegin();
auto idx = it - values.cbegin();
idx -= !!(idx == end);
return idx;
}
@@ -189,12 +189,30 @@ inline HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_thr
return out;
}
enum BinTypeSize : uint32_t {
kUint8BinsTypeSize = 1,
enum BinTypeSize : uint8_t {
kUint8BinsTypeSize = 1,
kUint16BinsTypeSize = 2,
kUint32BinsTypeSize = 4
};
/**
* \brief Dispatch for bin type, fn is a function that accepts a scalar of the bin type.
*/
template <typename Fn>
auto DispatchBinType(BinTypeSize type, Fn&& fn) {
switch (type) {
case kUint8BinsTypeSize: {
return fn(uint8_t{});
}
case kUint16BinsTypeSize: {
return fn(uint16_t{});
}
case kUint32BinsTypeSize: {
return fn(uint32_t{});
}
}
}
/**
* \brief Optionally compressed gradient index. The compression works only with dense
* data.