Support categorical data for dask functional interface and DQM. (#7043)
* Support categorical data for dask functional interface and DQM. * Implement categorical data support for GPU GK-merge. * Add support for dask functional interface. * Add support for DQM. * Get newer cupy.
This commit is contained in:
@@ -1151,12 +1151,12 @@ struct SegmentedUniqueReduceOp {
|
||||
* \return Number of unique values in total.
|
||||
*/
|
||||
template <typename DerivedPolicy, typename KeyInIt, typename KeyOutIt, typename ValInIt,
|
||||
typename ValOutIt, typename Comp>
|
||||
typename ValOutIt, typename CompValue, typename CompKey>
|
||||
size_t
|
||||
SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
|
||||
KeyInIt key_segments_first, KeyInIt key_segments_last, ValInIt val_first,
|
||||
ValInIt val_last, KeyOutIt key_segments_out, ValOutIt val_out,
|
||||
Comp comp) {
|
||||
CompValue comp, CompKey comp_key=thrust::equal_to<size_t>{}) {
|
||||
using Key = thrust::pair<size_t, typename thrust::iterator_traits<ValInIt>::value_type>;
|
||||
auto unique_key_it = dh::MakeTransformIterator<Key>(
|
||||
thrust::make_counting_iterator(static_cast<size_t>(0)),
|
||||
@@ -1177,7 +1177,7 @@ SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec
|
||||
exec, unique_key_it, unique_key_it + n_inputs,
|
||||
val_first, reduce_it, val_out,
|
||||
[=] __device__(Key const &l, Key const &r) {
|
||||
if (l.first == r.first) {
|
||||
if (comp_key(l.first, r.first)) {
|
||||
// In the same segment.
|
||||
return comp(l.second, r.second);
|
||||
}
|
||||
@@ -1195,7 +1195,9 @@ template <typename... Inputs,
|
||||
* = nullptr>
|
||||
size_t SegmentedUnique(Inputs &&...inputs) {
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
return SegmentedUnique(thrust::cuda::par(alloc), std::forward<Inputs&&>(inputs)...);
|
||||
return SegmentedUnique(thrust::cuda::par(alloc),
|
||||
std::forward<Inputs &&>(inputs)...,
|
||||
thrust::equal_to<size_t>{});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -129,60 +129,52 @@ void SortByWeight(dh::device_vector<float>* weights,
|
||||
});
|
||||
}
|
||||
|
||||
struct IsCatOp {
|
||||
XGBOOST_DEVICE bool operator()(FeatureType ft) { return ft == FeatureType::kCategorical; }
|
||||
};
|
||||
|
||||
void RemoveDuplicatedCategories(
|
||||
int32_t device, MetaInfo const &info, Span<bst_row_t> d_cuts_ptr,
|
||||
dh::device_vector<Entry> *p_sorted_entries,
|
||||
dh::caching_device_vector<size_t>* p_column_sizes_scan) {
|
||||
dh::caching_device_vector<size_t> *p_column_sizes_scan) {
|
||||
auto d_feature_types = info.feature_types.ConstDeviceSpan();
|
||||
auto& column_sizes_scan = *p_column_sizes_scan;
|
||||
if (!info.feature_types.Empty() &&
|
||||
thrust::any_of(dh::tbegin(d_feature_types), dh::tend(d_feature_types),
|
||||
IsCatOp{})) {
|
||||
auto& sorted_entries = *p_sorted_entries;
|
||||
// Removing duplicated entries in categorical features.
|
||||
dh::caching_device_vector<size_t> new_column_scan(column_sizes_scan.size());
|
||||
dh::SegmentedUnique(
|
||||
column_sizes_scan.data().get(),
|
||||
column_sizes_scan.data().get() + column_sizes_scan.size(),
|
||||
sorted_entries.begin(), sorted_entries.end(),
|
||||
new_column_scan.data().get(), sorted_entries.begin(),
|
||||
[=] __device__(Entry const &l, Entry const &r) {
|
||||
if (l.index == r.index) {
|
||||
if (IsCat(d_feature_types, l.index)) {
|
||||
return l.fvalue == r.fvalue;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
CHECK(!d_feature_types.empty());
|
||||
auto &column_sizes_scan = *p_column_sizes_scan;
|
||||
auto &sorted_entries = *p_sorted_entries;
|
||||
// Removing duplicated entries in categorical features.
|
||||
dh::caching_device_vector<size_t> new_column_scan(column_sizes_scan.size());
|
||||
dh::SegmentedUnique(column_sizes_scan.data().get(),
|
||||
column_sizes_scan.data().get() + column_sizes_scan.size(),
|
||||
sorted_entries.begin(), sorted_entries.end(),
|
||||
new_column_scan.data().get(), sorted_entries.begin(),
|
||||
[=] __device__(Entry const &l, Entry const &r) {
|
||||
if (l.index == r.index) {
|
||||
if (IsCat(d_feature_types, l.index)) {
|
||||
return l.fvalue == r.fvalue;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
// Renew the column scan and cut scan based on categorical data.
|
||||
auto d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan);
|
||||
dh::caching_device_vector<SketchContainer::OffsetT> new_cuts_size(
|
||||
info.num_col_ + 1);
|
||||
auto d_new_cuts_size = dh::ToSpan(new_cuts_size);
|
||||
auto d_new_columns_ptr = dh::ToSpan(new_column_scan);
|
||||
CHECK_EQ(new_column_scan.size(), new_cuts_size.size());
|
||||
dh::LaunchN(device, new_column_scan.size(), [=] __device__(size_t idx) {
|
||||
d_old_column_sizes_scan[idx] = d_new_columns_ptr[idx];
|
||||
if (idx == d_new_columns_ptr.size() - 1) {
|
||||
return;
|
||||
}
|
||||
if (IsCat(d_feature_types, idx)) {
|
||||
// Cut size is the same as number of categories in input.
|
||||
d_new_cuts_size[idx] =
|
||||
d_new_columns_ptr[idx + 1] - d_new_columns_ptr[idx];
|
||||
} else {
|
||||
d_new_cuts_size[idx] = d_cuts_ptr[idx] - d_cuts_ptr[idx];
|
||||
}
|
||||
});
|
||||
// Turn size into ptr.
|
||||
thrust::exclusive_scan(thrust::device, new_cuts_size.cbegin(),
|
||||
new_cuts_size.cend(), d_cuts_ptr.data());
|
||||
}
|
||||
// Renew the column scan and cut scan based on categorical data.
|
||||
auto d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan);
|
||||
dh::caching_device_vector<SketchContainer::OffsetT> new_cuts_size(
|
||||
info.num_col_ + 1);
|
||||
auto d_new_cuts_size = dh::ToSpan(new_cuts_size);
|
||||
auto d_new_columns_ptr = dh::ToSpan(new_column_scan);
|
||||
CHECK_EQ(new_column_scan.size(), new_cuts_size.size());
|
||||
dh::LaunchN(device, new_column_scan.size(), [=] __device__(size_t idx) {
|
||||
d_old_column_sizes_scan[idx] = d_new_columns_ptr[idx];
|
||||
if (idx == d_new_columns_ptr.size() - 1) {
|
||||
return;
|
||||
}
|
||||
if (IsCat(d_feature_types, idx)) {
|
||||
// Cut size is the same as number of categories in input.
|
||||
d_new_cuts_size[idx] =
|
||||
d_new_columns_ptr[idx + 1] - d_new_columns_ptr[idx];
|
||||
} else {
|
||||
d_new_cuts_size[idx] = d_cuts_ptr[idx] - d_cuts_ptr[idx];
|
||||
}
|
||||
});
|
||||
// Turn size into ptr.
|
||||
thrust::exclusive_scan(thrust::device, new_cuts_size.cbegin(),
|
||||
new_cuts_size.cend(), d_cuts_ptr.data());
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
@@ -215,8 +207,11 @@ void ProcessBatch(int device, MetaInfo const &info, const SparsePage &page,
|
||||
0, sorted_entries.size(),
|
||||
&cuts_ptr, &column_sizes_scan);
|
||||
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
|
||||
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr, &sorted_entries,
|
||||
&column_sizes_scan);
|
||||
|
||||
if (sketch_container->HasCategorical()) {
|
||||
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr,
|
||||
&sorted_entries, &column_sizes_scan);
|
||||
}
|
||||
|
||||
auto const& h_cuts_ptr = cuts_ptr.ConstHostVector();
|
||||
CHECK_EQ(d_cuts_ptr.size(), column_sizes_scan.size());
|
||||
@@ -281,8 +276,11 @@ void ProcessWeightedBatch(int device, const SparsePage& page,
|
||||
0, sorted_entries.size(),
|
||||
&cuts_ptr, &column_sizes_scan);
|
||||
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
|
||||
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr, &sorted_entries,
|
||||
&column_sizes_scan);
|
||||
if (sketch_container->HasCategorical()) {
|
||||
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr,
|
||||
&sorted_entries, &column_sizes_scan);
|
||||
}
|
||||
|
||||
auto const& h_cuts_ptr = cuts_ptr.ConstHostVector();
|
||||
|
||||
// Extract cuts
|
||||
|
||||
@@ -210,6 +210,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
|
||||
Span<bst_row_t const> const &x_ptr,
|
||||
Span<SketchEntry const> const &d_y,
|
||||
Span<bst_row_t const> const &y_ptr,
|
||||
Span<FeatureType const> feature_types,
|
||||
Span<SketchEntry> out,
|
||||
Span<bst_row_t> out_ptr) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
@@ -408,31 +409,6 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
|
||||
return n_uniques;
|
||||
}
|
||||
|
||||
size_t SketchContainer::Unique() {
|
||||
timer_.Start(__func__);
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
this->columns_ptr_.SetDevice(device_);
|
||||
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
|
||||
CHECK_EQ(d_column_scan.size(), num_columns_ + 1);
|
||||
Span<SketchEntry> entries = dh::ToSpan(this->Current());
|
||||
HostDeviceVector<OffsetT> scan_out(d_column_scan.size());
|
||||
scan_out.SetDevice(device_);
|
||||
auto d_scan_out = scan_out.DeviceSpan();
|
||||
|
||||
d_column_scan = this->columns_ptr_.DeviceSpan();
|
||||
size_t n_uniques = dh::SegmentedUnique(
|
||||
d_column_scan.data(), d_column_scan.data() + d_column_scan.size(),
|
||||
entries.data(), entries.data() + entries.size(), scan_out.DevicePointer(),
|
||||
entries.data(),
|
||||
detail::SketchUnique{});
|
||||
this->columns_ptr_.Copy(scan_out);
|
||||
CHECK(!this->columns_ptr_.HostCanRead());
|
||||
|
||||
this->Current().resize(n_uniques);
|
||||
timer_.Stop(__func__);
|
||||
return n_uniques;
|
||||
}
|
||||
|
||||
void SketchContainer::Prune(size_t to) {
|
||||
timer_.Start(__func__);
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
@@ -490,13 +466,20 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
|
||||
this->Other().resize(this->Current().size() + that.size());
|
||||
CHECK_EQ(d_that_columns_ptr.size(), this->columns_ptr_.Size());
|
||||
|
||||
MergeImpl(device_, this->Data(), this->ColumnsPtr(),
|
||||
that, d_that_columns_ptr,
|
||||
dh::ToSpan(this->Other()), columns_ptr_b_.DeviceSpan());
|
||||
auto feature_types = this->FeatureTypes().ConstDeviceSpan();
|
||||
MergeImpl(device_, this->Data(), this->ColumnsPtr(), that, d_that_columns_ptr,
|
||||
feature_types, dh::ToSpan(this->Other()),
|
||||
columns_ptr_b_.DeviceSpan());
|
||||
this->columns_ptr_.Copy(columns_ptr_b_);
|
||||
CHECK_EQ(this->columns_ptr_.Size(), num_columns_ + 1);
|
||||
this->Alternate();
|
||||
|
||||
if (this->HasCategorical()) {
|
||||
auto d_feature_types = this->FeatureTypes().ConstDeviceSpan();
|
||||
this->Unique([d_feature_types] __device__(size_t l_fidx, size_t r_fidx) {
|
||||
return l_fidx == r_fidx && IsCat(d_feature_types, l_fidx);
|
||||
});
|
||||
}
|
||||
timer_.Stop(__func__);
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,19 @@ class HistogramCuts;
|
||||
using WQSketch = WQuantileSketch<bst_float, bst_float>;
|
||||
using SketchEntry = WQSketch::Entry;
|
||||
|
||||
namespace detail {
|
||||
struct IsCatOp {
|
||||
XGBOOST_DEVICE bool operator()(FeatureType ft) {
|
||||
return ft == FeatureType::kCategorical;
|
||||
}
|
||||
};
|
||||
struct SketchUnique {
|
||||
XGBOOST_DEVICE bool operator()(SketchEntry const& a, SketchEntry const& b) const {
|
||||
return a.value - b.value == 0;
|
||||
}
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
/*!
|
||||
* \brief A container that holds the device sketches. Sketching is performed per-column,
|
||||
* but fused into single operation for performance.
|
||||
@@ -43,6 +56,8 @@ class SketchContainer {
|
||||
HostDeviceVector<OffsetT> columns_ptr_;
|
||||
HostDeviceVector<OffsetT> columns_ptr_b_;
|
||||
|
||||
bool has_categorical_{false};
|
||||
|
||||
dh::device_vector<SketchEntry>& Current() {
|
||||
if (current_buffer_) {
|
||||
return entries_a_;
|
||||
@@ -102,14 +117,21 @@ class SketchContainer {
|
||||
this->feature_types_.SetDevice(device);
|
||||
this->feature_types_.ConstDeviceSpan();
|
||||
this->feature_types_.ConstHostSpan();
|
||||
|
||||
auto d_feature_types = feature_types_.ConstDeviceSpan();
|
||||
has_categorical_ =
|
||||
!d_feature_types.empty() &&
|
||||
thrust::any_of(dh::tbegin(d_feature_types), dh::tend(d_feature_types),
|
||||
detail::IsCatOp{});
|
||||
|
||||
timer_.Init(__func__);
|
||||
}
|
||||
/* \brief Return GPU ID for this container. */
|
||||
int32_t DeviceIdx() const { return device_; }
|
||||
/* \brief Whether the predictor matrix contains categorical features. */
|
||||
bool HasCategorical() const { return has_categorical_; }
|
||||
/* \brief Accumulate weights of duplicated entries in input. */
|
||||
size_t ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_columns_ptr_in);
|
||||
/* \brief Removes all the duplicated elements in quantile structure. */
|
||||
size_t Unique();
|
||||
/* Fix rounding error and re-establish invariance. The error is mostly generated by the
|
||||
* addition inside `RMinNext` and subtraction in `RMaxPrev`. */
|
||||
void FixError();
|
||||
@@ -154,15 +176,35 @@ class SketchContainer {
|
||||
|
||||
SketchContainer(const SketchContainer&) = delete;
|
||||
SketchContainer& operator=(const SketchContainer&) = delete;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
struct SketchUnique {
|
||||
XGBOOST_DEVICE bool operator()(SketchEntry const& a, SketchEntry const& b) const {
|
||||
return a.value - b.value == 0;
|
||||
/* \brief Removes all the duplicated elements in quantile structure. */
|
||||
template <typename KeyComp = thrust::equal_to<size_t>>
|
||||
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
|
||||
timer_.Start(__func__);
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
this->columns_ptr_.SetDevice(device_);
|
||||
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
|
||||
CHECK_EQ(d_column_scan.size(), num_columns_ + 1);
|
||||
Span<SketchEntry> entries = dh::ToSpan(this->Current());
|
||||
HostDeviceVector<OffsetT> scan_out(d_column_scan.size());
|
||||
scan_out.SetDevice(device_);
|
||||
auto d_scan_out = scan_out.DeviceSpan();
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
|
||||
d_column_scan = this->columns_ptr_.DeviceSpan();
|
||||
size_t n_uniques = dh::SegmentedUnique(
|
||||
thrust::cuda::par(alloc), d_column_scan.data(),
|
||||
d_column_scan.data() + d_column_scan.size(), entries.data(),
|
||||
entries.data() + entries.size(), scan_out.DevicePointer(),
|
||||
entries.data(), detail::SketchUnique{}, key_comp);
|
||||
this->columns_ptr_.Copy(scan_out);
|
||||
CHECK(!this->columns_ptr_.HostCanRead());
|
||||
|
||||
this->Current().resize(n_uniques);
|
||||
timer_.Stop(__func__);
|
||||
return n_uniques;
|
||||
}
|
||||
};
|
||||
} // namespace detail
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
Reference in New Issue
Block a user