Support categorical data for dask functional interface and DQM. (#7043)

* Support categorical data for dask functional interface and DQM.

* Implement categorical data support for GPU GK-merge.
* Add support for dask functional interface.
* Add support for DQM.

* Get newer cupy.
This commit is contained in:
Jiaming Yuan
2021-06-18 13:06:52 +08:00
committed by GitHub
parent 7dd29ffd47
commit 86715e4cd4
16 changed files with 364 additions and 167 deletions

View File

@@ -1151,12 +1151,12 @@ struct SegmentedUniqueReduceOp {
* \return Number of unique values in total.
*/
template <typename DerivedPolicy, typename KeyInIt, typename KeyOutIt, typename ValInIt,
typename ValOutIt, typename Comp>
typename ValOutIt, typename CompValue, typename CompKey>
size_t
SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
KeyInIt key_segments_first, KeyInIt key_segments_last, ValInIt val_first,
ValInIt val_last, KeyOutIt key_segments_out, ValOutIt val_out,
Comp comp) {
CompValue comp, CompKey comp_key=thrust::equal_to<size_t>{}) {
using Key = thrust::pair<size_t, typename thrust::iterator_traits<ValInIt>::value_type>;
auto unique_key_it = dh::MakeTransformIterator<Key>(
thrust::make_counting_iterator(static_cast<size_t>(0)),
@@ -1177,7 +1177,7 @@ SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec
exec, unique_key_it, unique_key_it + n_inputs,
val_first, reduce_it, val_out,
[=] __device__(Key const &l, Key const &r) {
if (l.first == r.first) {
if (comp_key(l.first, r.first)) {
// In the same segment.
return comp(l.second, r.second);
}
@@ -1195,7 +1195,9 @@ template <typename... Inputs,
* = nullptr>
size_t SegmentedUnique(Inputs &&...inputs) {
dh::XGBCachingDeviceAllocator<char> alloc;
return SegmentedUnique(thrust::cuda::par(alloc), std::forward<Inputs&&>(inputs)...);
return SegmentedUnique(thrust::cuda::par(alloc),
std::forward<Inputs &&>(inputs)...,
thrust::equal_to<size_t>{});
}
/**

View File

@@ -129,60 +129,52 @@ void SortByWeight(dh::device_vector<float>* weights,
});
}
struct IsCatOp {
XGBOOST_DEVICE bool operator()(FeatureType ft) { return ft == FeatureType::kCategorical; }
};
void RemoveDuplicatedCategories(
int32_t device, MetaInfo const &info, Span<bst_row_t> d_cuts_ptr,
dh::device_vector<Entry> *p_sorted_entries,
dh::caching_device_vector<size_t>* p_column_sizes_scan) {
dh::caching_device_vector<size_t> *p_column_sizes_scan) {
auto d_feature_types = info.feature_types.ConstDeviceSpan();
auto& column_sizes_scan = *p_column_sizes_scan;
if (!info.feature_types.Empty() &&
thrust::any_of(dh::tbegin(d_feature_types), dh::tend(d_feature_types),
IsCatOp{})) {
auto& sorted_entries = *p_sorted_entries;
// Removing duplicated entries in categorical features.
dh::caching_device_vector<size_t> new_column_scan(column_sizes_scan.size());
dh::SegmentedUnique(
column_sizes_scan.data().get(),
column_sizes_scan.data().get() + column_sizes_scan.size(),
sorted_entries.begin(), sorted_entries.end(),
new_column_scan.data().get(), sorted_entries.begin(),
[=] __device__(Entry const &l, Entry const &r) {
if (l.index == r.index) {
if (IsCat(d_feature_types, l.index)) {
return l.fvalue == r.fvalue;
}
}
return false;
});
CHECK(!d_feature_types.empty());
auto &column_sizes_scan = *p_column_sizes_scan;
auto &sorted_entries = *p_sorted_entries;
// Removing duplicated entries in categorical features.
dh::caching_device_vector<size_t> new_column_scan(column_sizes_scan.size());
dh::SegmentedUnique(column_sizes_scan.data().get(),
column_sizes_scan.data().get() + column_sizes_scan.size(),
sorted_entries.begin(), sorted_entries.end(),
new_column_scan.data().get(), sorted_entries.begin(),
[=] __device__(Entry const &l, Entry const &r) {
if (l.index == r.index) {
if (IsCat(d_feature_types, l.index)) {
return l.fvalue == r.fvalue;
}
}
return false;
});
// Renew the column scan and cut scan based on categorical data.
auto d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan);
dh::caching_device_vector<SketchContainer::OffsetT> new_cuts_size(
info.num_col_ + 1);
auto d_new_cuts_size = dh::ToSpan(new_cuts_size);
auto d_new_columns_ptr = dh::ToSpan(new_column_scan);
CHECK_EQ(new_column_scan.size(), new_cuts_size.size());
dh::LaunchN(device, new_column_scan.size(), [=] __device__(size_t idx) {
d_old_column_sizes_scan[idx] = d_new_columns_ptr[idx];
if (idx == d_new_columns_ptr.size() - 1) {
return;
}
if (IsCat(d_feature_types, idx)) {
// Cut size is the same as number of categories in input.
d_new_cuts_size[idx] =
d_new_columns_ptr[idx + 1] - d_new_columns_ptr[idx];
} else {
d_new_cuts_size[idx] = d_cuts_ptr[idx] - d_cuts_ptr[idx];
}
});
// Turn size into ptr.
thrust::exclusive_scan(thrust::device, new_cuts_size.cbegin(),
new_cuts_size.cend(), d_cuts_ptr.data());
}
// Renew the column scan and cut scan based on categorical data.
auto d_old_column_sizes_scan = dh::ToSpan(column_sizes_scan);
dh::caching_device_vector<SketchContainer::OffsetT> new_cuts_size(
info.num_col_ + 1);
auto d_new_cuts_size = dh::ToSpan(new_cuts_size);
auto d_new_columns_ptr = dh::ToSpan(new_column_scan);
CHECK_EQ(new_column_scan.size(), new_cuts_size.size());
dh::LaunchN(device, new_column_scan.size(), [=] __device__(size_t idx) {
d_old_column_sizes_scan[idx] = d_new_columns_ptr[idx];
if (idx == d_new_columns_ptr.size() - 1) {
return;
}
if (IsCat(d_feature_types, idx)) {
// Cut size is the same as number of categories in input.
d_new_cuts_size[idx] =
d_new_columns_ptr[idx + 1] - d_new_columns_ptr[idx];
} else {
d_new_cuts_size[idx] = d_cuts_ptr[idx] - d_cuts_ptr[idx];
}
});
// Turn size into ptr.
thrust::exclusive_scan(thrust::device, new_cuts_size.cbegin(),
new_cuts_size.cend(), d_cuts_ptr.data());
}
} // namespace detail
@@ -215,8 +207,11 @@ void ProcessBatch(int device, MetaInfo const &info, const SparsePage &page,
0, sorted_entries.size(),
&cuts_ptr, &column_sizes_scan);
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr, &sorted_entries,
&column_sizes_scan);
if (sketch_container->HasCategorical()) {
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr,
&sorted_entries, &column_sizes_scan);
}
auto const& h_cuts_ptr = cuts_ptr.ConstHostVector();
CHECK_EQ(d_cuts_ptr.size(), column_sizes_scan.size());
@@ -281,8 +276,11 @@ void ProcessWeightedBatch(int device, const SparsePage& page,
0, sorted_entries.size(),
&cuts_ptr, &column_sizes_scan);
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr, &sorted_entries,
&column_sizes_scan);
if (sketch_container->HasCategorical()) {
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr,
&sorted_entries, &column_sizes_scan);
}
auto const& h_cuts_ptr = cuts_ptr.ConstHostVector();
// Extract cuts

View File

@@ -210,6 +210,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
Span<bst_row_t const> const &x_ptr,
Span<SketchEntry const> const &d_y,
Span<bst_row_t const> const &y_ptr,
Span<FeatureType const> feature_types,
Span<SketchEntry> out,
Span<bst_row_t> out_ptr) {
dh::safe_cuda(cudaSetDevice(device));
@@ -408,31 +409,6 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
return n_uniques;
}
size_t SketchContainer::Unique() {
timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_));
this->columns_ptr_.SetDevice(device_);
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
CHECK_EQ(d_column_scan.size(), num_columns_ + 1);
Span<SketchEntry> entries = dh::ToSpan(this->Current());
HostDeviceVector<OffsetT> scan_out(d_column_scan.size());
scan_out.SetDevice(device_);
auto d_scan_out = scan_out.DeviceSpan();
d_column_scan = this->columns_ptr_.DeviceSpan();
size_t n_uniques = dh::SegmentedUnique(
d_column_scan.data(), d_column_scan.data() + d_column_scan.size(),
entries.data(), entries.data() + entries.size(), scan_out.DevicePointer(),
entries.data(),
detail::SketchUnique{});
this->columns_ptr_.Copy(scan_out);
CHECK(!this->columns_ptr_.HostCanRead());
this->Current().resize(n_uniques);
timer_.Stop(__func__);
return n_uniques;
}
void SketchContainer::Prune(size_t to) {
timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_));
@@ -490,13 +466,20 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
this->Other().resize(this->Current().size() + that.size());
CHECK_EQ(d_that_columns_ptr.size(), this->columns_ptr_.Size());
MergeImpl(device_, this->Data(), this->ColumnsPtr(),
that, d_that_columns_ptr,
dh::ToSpan(this->Other()), columns_ptr_b_.DeviceSpan());
auto feature_types = this->FeatureTypes().ConstDeviceSpan();
MergeImpl(device_, this->Data(), this->ColumnsPtr(), that, d_that_columns_ptr,
feature_types, dh::ToSpan(this->Other()),
columns_ptr_b_.DeviceSpan());
this->columns_ptr_.Copy(columns_ptr_b_);
CHECK_EQ(this->columns_ptr_.Size(), num_columns_ + 1);
this->Alternate();
if (this->HasCategorical()) {
auto d_feature_types = this->FeatureTypes().ConstDeviceSpan();
this->Unique([d_feature_types] __device__(size_t l_fidx, size_t r_fidx) {
return l_fidx == r_fidx && IsCat(d_feature_types, l_fidx);
});
}
timer_.Stop(__func__);
}

View File

@@ -16,6 +16,19 @@ class HistogramCuts;
using WQSketch = WQuantileSketch<bst_float, bst_float>;
using SketchEntry = WQSketch::Entry;
namespace detail {
struct IsCatOp {
XGBOOST_DEVICE bool operator()(FeatureType ft) {
return ft == FeatureType::kCategorical;
}
};
struct SketchUnique {
XGBOOST_DEVICE bool operator()(SketchEntry const& a, SketchEntry const& b) const {
return a.value - b.value == 0;
}
};
} // namespace detail
/*!
* \brief A container that holds the device sketches. Sketching is performed per-column,
* but fused into single operation for performance.
@@ -43,6 +56,8 @@ class SketchContainer {
HostDeviceVector<OffsetT> columns_ptr_;
HostDeviceVector<OffsetT> columns_ptr_b_;
bool has_categorical_{false};
dh::device_vector<SketchEntry>& Current() {
if (current_buffer_) {
return entries_a_;
@@ -102,14 +117,21 @@ class SketchContainer {
this->feature_types_.SetDevice(device);
this->feature_types_.ConstDeviceSpan();
this->feature_types_.ConstHostSpan();
auto d_feature_types = feature_types_.ConstDeviceSpan();
has_categorical_ =
!d_feature_types.empty() &&
thrust::any_of(dh::tbegin(d_feature_types), dh::tend(d_feature_types),
detail::IsCatOp{});
timer_.Init(__func__);
}
/* \brief Return GPU ID for this container. */
int32_t DeviceIdx() const { return device_; }
/* \brief Whether the predictor matrix contains categorical features. */
bool HasCategorical() const { return has_categorical_; }
/* \brief Accumulate weights of duplicated entries in input. */
size_t ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_columns_ptr_in);
/* \brief Removes all the duplicated elements in quantile structure. */
size_t Unique();
/* Fix rounding error and re-establish invariance. The error is mostly generated by the
* addition inside `RMinNext` and subtraction in `RMaxPrev`. */
void FixError();
@@ -154,15 +176,35 @@ class SketchContainer {
SketchContainer(const SketchContainer&) = delete;
SketchContainer& operator=(const SketchContainer&) = delete;
};
namespace detail {
struct SketchUnique {
XGBOOST_DEVICE bool operator()(SketchEntry const& a, SketchEntry const& b) const {
return a.value - b.value == 0;
/* \brief Removes all the duplicated elements in quantile structure. */
template <typename KeyComp = thrust::equal_to<size_t>>
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_));
this->columns_ptr_.SetDevice(device_);
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
CHECK_EQ(d_column_scan.size(), num_columns_ + 1);
Span<SketchEntry> entries = dh::ToSpan(this->Current());
HostDeviceVector<OffsetT> scan_out(d_column_scan.size());
scan_out.SetDevice(device_);
auto d_scan_out = scan_out.DeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc;
d_column_scan = this->columns_ptr_.DeviceSpan();
size_t n_uniques = dh::SegmentedUnique(
thrust::cuda::par(alloc), d_column_scan.data(),
d_column_scan.data() + d_column_scan.size(), entries.data(),
entries.data() + entries.size(), scan_out.DevicePointer(),
entries.data(), detail::SketchUnique{}, key_comp);
this->columns_ptr_.Copy(scan_out);
CHECK(!this->columns_ptr_.HostCanRead());
this->Current().resize(n_uniques);
timer_.Stop(__func__);
return n_uniques;
}
};
} // namespace detail
} // namespace common
} // namespace xgboost

View File

@@ -134,17 +134,20 @@ struct WriteCompressedEllpackFunctor {
const common::CompressedBufferWriter& writer,
AdapterBatchT batch,
EllpackDeviceAccessor accessor,
common::Span<FeatureType const> feature_types,
const data::IsValidFunctor& is_valid)
: d_buffer(buffer),
writer(writer),
batch(std::move(batch)),
accessor(std::move(accessor)),
feature_types(std::move(feature_types)),
is_valid(is_valid) {}
common::CompressedByteT* d_buffer;
common::CompressedBufferWriter writer;
AdapterBatchT batch;
EllpackDeviceAccessor accessor;
common::Span<FeatureType const> feature_types;
data::IsValidFunctor is_valid;
using Tuple = thrust::tuple<size_t, size_t, size_t>;
@@ -154,7 +157,12 @@ struct WriteCompressedEllpackFunctor {
// -1 because the scan is inclusive
size_t output_position =
accessor.row_stride * e.row_idx + out.get<1>() - 1;
auto bin_idx = accessor.SearchBin(e.value, e.column_idx);
uint32_t bin_idx = 0;
if (common::IsCat(feature_types, e.column_idx)) {
bin_idx = accessor.SearchBin<true>(e.value, e.column_idx);
} else {
bin_idx = accessor.SearchBin<false>(e.value, e.column_idx);
}
writer.AtomicWriteSymbol(d_buffer, bin_idx, output_position);
}
return 0;
@@ -184,8 +192,9 @@ class TypedDiscard : public thrust::discard_iterator<T> {
// Here the data is already correctly ordered and simply needs to be compacted
// to remove missing data
template <typename AdapterBatchT>
void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
int device_idx, float missing) {
void CopyDataToEllpack(const AdapterBatchT &batch,
common::Span<FeatureType const> feature_types,
EllpackPageImpl *dst, int device_idx, float missing) {
// Some witchcraft happens here
// The goal is to copy valid elements out of the input to an ELLPACK matrix
// with a given row stride, using no extra working memory Standard stream
@@ -220,7 +229,8 @@ void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
// We redirect the scan output into this functor to do the actual writing
WriteCompressedEllpackFunctor<AdapterBatchT> functor(
d_compressed_buffer, writer, batch, device_accessor, is_valid);
d_compressed_buffer, writer, batch, device_accessor, feature_types,
is_valid);
TypedDiscard<Tuple> discard;
thrust::transform_output_iterator<
WriteCompressedEllpackFunctor<AdapterBatchT>, decltype(discard)>
@@ -263,22 +273,22 @@ template <typename AdapterBatch>
EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
bool is_dense, int nthread,
common::Span<size_t> row_counts_span,
common::Span<FeatureType const> feature_types,
size_t row_stride, size_t n_rows, size_t n_cols,
common::HistogramCuts const& cuts) {
dh::safe_cuda(cudaSetDevice(device));
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
CopyDataToEllpack(batch, this, device, missing);
CopyDataToEllpack(batch, feature_types, this, device, missing);
WriteNullValues(this, device, row_counts_span);
}
#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T) \
template EllpackPageImpl::EllpackPageImpl( \
__BATCH_T batch, float missing, int device, \
bool is_dense, int nthread, \
common::Span<size_t> row_counts_span, \
size_t row_stride, size_t n_rows, size_t n_cols, \
common::HistogramCuts const& cuts);
#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T) \
template EllpackPageImpl::EllpackPageImpl( \
__BATCH_T batch, float missing, int device, bool is_dense, int nthread, \
common::Span<size_t> row_counts_span, \
common::Span<FeatureType const> feature_types, size_t row_stride, \
size_t n_rows, size_t n_cols, common::HistogramCuts const &cuts);
ELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)
ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)
@@ -467,11 +477,17 @@ size_t EllpackPageImpl::MemCostBytes(size_t num_rows, size_t row_stride,
return compressed_size_bytes;
}
EllpackDeviceAccessor EllpackPageImpl::GetDeviceAccessor(int device) const {
EllpackDeviceAccessor EllpackPageImpl::GetDeviceAccessor(
int device, common::Span<FeatureType const> feature_types) const {
gidx_buffer.SetDevice(device);
return EllpackDeviceAccessor(
device, cuts_, is_dense, row_stride, base_rowid, n_rows,
common::CompressedIterator<uint32_t>(gidx_buffer.ConstDevicePointer(),
NumSymbols()));
return {device,
cuts_,
is_dense,
row_stride,
base_rowid,
n_rows,
common::CompressedIterator<uint32_t>(gidx_buffer.ConstDevicePointer(),
NumSymbols()),
feature_types};
}
} // namespace xgboost

View File

@@ -10,6 +10,7 @@
#include "../common/compressed_iterator.h"
#include "../common/device_helpers.cuh"
#include "../common/hist_util.h"
#include "../common/categorical.h"
#include <thrust/binary_search.h>
namespace xgboost {
@@ -31,13 +32,17 @@ struct EllpackDeviceAccessor {
/*! \brief Histogram cut values. Size equals to (bins per feature * number of features). */
common::Span<const bst_float> gidx_fvalue_map;
common::Span<const FeatureType> feature_types;
EllpackDeviceAccessor(int device, const common::HistogramCuts& cuts,
bool is_dense, size_t row_stride, size_t base_rowid,
size_t n_rows,common::CompressedIterator<uint32_t> gidx_iter)
size_t n_rows,common::CompressedIterator<uint32_t> gidx_iter,
common::Span<FeatureType const> feature_types)
: is_dense(is_dense),
row_stride(row_stride),
base_rowid(base_rowid),
n_rows(n_rows) ,gidx_iter(gidx_iter){
n_rows(n_rows) ,gidx_iter(gidx_iter),
feature_types{feature_types} {
cuts.cut_values_.SetDevice(device);
cuts.cut_ptrs_.SetDevice(device);
cuts.min_vals_.SetDevice(device);
@@ -64,12 +69,23 @@ struct EllpackDeviceAccessor {
return gidx;
}
template <bool is_cat>
__device__ uint32_t SearchBin(float value, size_t column_id) const {
auto beg = feature_segments[column_id];
auto end = feature_segments[column_id + 1];
auto it =
thrust::upper_bound(thrust::seq, gidx_fvalue_map.cbegin()+ beg, gidx_fvalue_map.cbegin() + end, value);
uint32_t idx = it - gidx_fvalue_map.cbegin();
uint32_t idx = 0;
if (is_cat) {
auto it = dh::MakeTransformIterator<bst_cat_t>(
gidx_fvalue_map.cbegin(), [](float v) { return common::AsCat(v); });
idx = thrust::lower_bound(thrust::seq, it + beg, it + end,
common::AsCat(value)) -
it;
} else {
auto it = thrust::upper_bound(thrust::seq, gidx_fvalue_map.cbegin() + beg,
gidx_fvalue_map.cbegin() + end, value);
idx = it - gidx_fvalue_map.cbegin();
}
if (idx == end) {
idx -= 1;
}
@@ -134,10 +150,12 @@ class EllpackPageImpl {
explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
template <typename AdapterBatch>
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense, int nthread,
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device,
bool is_dense, int nthread,
common::Span<size_t> row_counts_span,
common::Span<FeatureType const> feature_types,
size_t row_stride, size_t n_rows, size_t n_cols,
common::HistogramCuts const& cuts);
common::HistogramCuts const &cuts);
/*! \brief Copy the elements of the given ELLPACK page into this page.
*
@@ -176,7 +194,9 @@ class EllpackPageImpl {
* not found). */
size_t NumSymbols() const { return cuts_.TotalBins() + 1; }
EllpackDeviceAccessor GetDeviceAccessor(int device) const;
EllpackDeviceAccessor
GetDeviceAccessor(int device,
common::Span<FeatureType const> feature_types = {}) const;
private:
/*!

View File

@@ -148,9 +148,13 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
return GetRowCounts(value, row_counts_span, get_device(), missing);
});
auto is_dense = this->IsDense();
proxy->Info().feature_types.SetDevice(get_device());
auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
auto new_impl = Dispatch(proxy, [&](auto const &value) {
return EllpackPageImpl(value, missing, get_device(), is_dense, nthread,
row_counts_span, row_stride, rows, cols, cuts);
return EllpackPageImpl(value, missing, get_device(), is_dense, nthread,
row_counts_span, d_feature_types, row_stride, rows,
cols, cuts);
});
size_t num_elements = page_->Impl()->Copy(get_device(), &new_impl, offset);
offset += num_elements;

View File

@@ -155,6 +155,9 @@ struct EllpackLoader {
if (gidx == -1) {
return nan("");
}
if (common::IsCat(matrix.feature_types, fidx)) {
return matrix.gidx_fvalue_map[gidx];
}
// The gradient index needs to be shifted by one as min values are not included in the
// cuts.
if (gidx == matrix.feature_segments[fidx]) {
@@ -592,8 +595,10 @@ class GPUPredictor : public xgboost::Predictor {
} else {
size_t batch_offset = 0;
for (auto const& page : dmat->GetBatches<EllpackPage>()) {
dmat->Info().feature_types.SetDevice(generic_param_->gpu_id);
auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
this->PredictInternal(
page.Impl()->GetDeviceAccessor(generic_param_->gpu_id),
page.Impl()->GetDeviceAccessor(generic_param_->gpu_id, feature_types),
d_model,
out_preds,
batch_offset);