skip missing lookup if nothing is missing in CPU hist partition kernel. (#5644)
* [xgboost] skip missing lookup if nothing is missing
This commit is contained in:
committed by
GitHub
parent
9ad40901a8
commit
4e64e2ef8e
@@ -154,6 +154,7 @@ class ColumnMatrix {
|
||||
index_base_ = const_cast<uint32_t*>(gmat.cut.Ptrs().data());
|
||||
|
||||
const bool noMissingValues = NoMissingValues(gmat.row_ptr[nrow], nrow, nfeature);
|
||||
any_missing_ = !noMissingValues;
|
||||
|
||||
if (noMissingValues) {
|
||||
missing_flags_.resize(feature_offsets_[nfeature], false);
|
||||
@@ -311,11 +312,18 @@ class ColumnMatrix {
|
||||
const BinTypeSize GetTypeSize() const {
|
||||
return bins_type_size_;
|
||||
}
|
||||
|
||||
// This is just an utility function
|
||||
const bool NoMissingValues(const size_t n_elements,
|
||||
const size_t n_row, const size_t n_features) {
|
||||
return n_elements == n_features * n_row;
|
||||
}
|
||||
|
||||
// And this returns part of state
|
||||
const bool AnyMissing() const {
|
||||
return any_missing_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<uint8_t> index_;
|
||||
|
||||
@@ -329,6 +337,7 @@ class ColumnMatrix {
|
||||
uint32_t* index_base_;
|
||||
std::vector<bool> missing_flags_;
|
||||
BinTypeSize bins_type_size_;
|
||||
bool any_missing_;
|
||||
};
|
||||
|
||||
} // namespace common
|
||||
|
||||
@@ -826,7 +826,7 @@ void QuantileHistMaker::Builder::EvaluateSplits(const std::vector<ExpandEntry>&
|
||||
// on comparison of indexes values (idx_span) and split point (split_cond)
|
||||
// Handle dense columns
|
||||
// Analog of std::stable_partition, but in no-inplace manner
|
||||
template <bool default_left, typename BinIdxType>
|
||||
template <bool default_left, bool any_missing, typename BinIdxType>
|
||||
inline std::pair<size_t, size_t> PartitionDenseKernel(const common::DenseColumn<BinIdxType>& column,
|
||||
common::Span<const size_t> rid_span, const int32_t split_cond,
|
||||
common::Span<size_t> left_part, common::Span<size_t> right_part) {
|
||||
@@ -837,14 +837,24 @@ inline std::pair<size_t, size_t> PartitionDenseKernel(const common::DenseColumn<
|
||||
size_t nleft_elems = 0;
|
||||
size_t nright_elems = 0;
|
||||
|
||||
for (auto rid : rid_span) {
|
||||
if (column.IsMissing(rid)) {
|
||||
if (default_left) {
|
||||
p_left_part[nleft_elems++] = rid;
|
||||
if (any_missing) {
|
||||
for (auto rid : rid_span) {
|
||||
if (column.IsMissing(rid)) {
|
||||
if (default_left) {
|
||||
p_left_part[nleft_elems++] = rid;
|
||||
} else {
|
||||
p_right_part[nright_elems++] = rid;
|
||||
}
|
||||
} else {
|
||||
p_right_part[nright_elems++] = rid;
|
||||
if ((static_cast<int32_t>(idx[rid]) + offset) <= split_cond) {
|
||||
p_left_part[nleft_elems++] = rid;
|
||||
} else {
|
||||
p_right_part[nright_elems++] = rid;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
}
|
||||
} else {
|
||||
for (auto rid : rid_span) {
|
||||
if ((static_cast<int32_t>(idx[rid]) + offset) <= split_cond) {
|
||||
p_left_part[nleft_elems++] = rid;
|
||||
} else {
|
||||
@@ -919,6 +929,7 @@ void QuantileHistMaker::Builder::PartitionKernel(
|
||||
const size_t node_in_set, const size_t nid, common::Range1d range,
|
||||
const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree) {
|
||||
const size_t* rid = row_set_collection_[nid].begin;
|
||||
|
||||
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
|
||||
common::Span<size_t> left = partition_builder_.GetLeftBuffer(node_in_set,
|
||||
range.begin(), range.end());
|
||||
@@ -934,9 +945,21 @@ void QuantileHistMaker::Builder::PartitionKernel(
|
||||
const common::DenseColumn<BinIdxType>& column =
|
||||
static_cast<const common::DenseColumn<BinIdxType>& >(*(column_ptr.get()));
|
||||
if (default_left) {
|
||||
child_nodes_sizes = PartitionDenseKernel<true>(column, rid_span, split_cond, left, right);
|
||||
if (column_matrix.AnyMissing()) {
|
||||
child_nodes_sizes = PartitionDenseKernel<true, true>(column, rid_span, split_cond,
|
||||
left, right);
|
||||
} else {
|
||||
child_nodes_sizes = PartitionDenseKernel<true, false>(column, rid_span, split_cond,
|
||||
left, right);
|
||||
}
|
||||
} else {
|
||||
child_nodes_sizes = PartitionDenseKernel<false>(column, rid_span, split_cond, left, right);
|
||||
if (column_matrix.AnyMissing()) {
|
||||
child_nodes_sizes = PartitionDenseKernel<false, true>(column, rid_span, split_cond,
|
||||
left, right);
|
||||
} else {
|
||||
child_nodes_sizes = PartitionDenseKernel<false, false>(column, rid_span, split_cond,
|
||||
left, right);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const common::SparseColumn<BinIdxType>& column
|
||||
|
||||
Reference in New Issue
Block a user