Remove column major specialization. (#5755)
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -34,36 +34,27 @@ struct IsValidFunctor : public thrust::unary_function<Entry, bool> {
|
||||
};
|
||||
|
||||
class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
friend class CudfAdapter;
|
||||
|
||||
public:
|
||||
CudfAdapterBatch() = default;
|
||||
CudfAdapterBatch(common::Span<ArrayInterface> columns,
|
||||
common::Span<size_t> column_ptr, size_t num_elements)
|
||||
CudfAdapterBatch(common::Span<ArrayInterface> columns, size_t num_rows)
|
||||
: columns_(columns),
|
||||
column_ptr_(column_ptr),
|
||||
num_elements_(num_elements) {}
|
||||
size_t Size() const { return num_elements_; }
|
||||
num_rows_(num_rows) {}
|
||||
size_t Size() const { return num_rows_ * columns_.size(); }
|
||||
__device__ COOTuple GetElement(size_t idx) const {
|
||||
size_t column_idx =
|
||||
thrust::upper_bound(thrust::seq,column_ptr_.begin(), column_ptr_.end(), idx) - column_ptr_.begin() - 1;
|
||||
auto& column = columns_[column_idx];
|
||||
size_t row_idx = idx - column_ptr_[column_idx];
|
||||
size_t column_idx = idx % columns_.size();
|
||||
size_t row_idx = idx / columns_.size();
|
||||
auto const& column = columns_[column_idx];
|
||||
float value = column.valid.Data() == nullptr || column.valid.Check(row_idx)
|
||||
? column.GetElement(row_idx)
|
||||
: std::numeric_limits<float>::quiet_NaN();
|
||||
return {row_idx, column_idx, value};
|
||||
}
|
||||
__device__ float GetValue(size_t ridx, bst_feature_t fidx) const {
|
||||
auto const& column = columns_[fidx];
|
||||
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
|
||||
? column.GetElement(ridx)
|
||||
: std::numeric_limits<float>::quiet_NaN();
|
||||
return value;
|
||||
}
|
||||
|
||||
private:
|
||||
common::Span<ArrayInterface> columns_;
|
||||
common::Span<size_t> column_ptr_;
|
||||
size_t num_elements_;
|
||||
size_t num_rows_;
|
||||
};
|
||||
|
||||
/*!
|
||||
@@ -127,7 +118,6 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
|
||||
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
|
||||
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();
|
||||
std::vector<ArrayInterface> columns;
|
||||
std::vector<size_t> column_ptr({0});
|
||||
auto first_column = ArrayInterface(get<Object const>(json_columns[0]));
|
||||
device_idx_ = dh::CudaGetPointerDevice(first_column.data);
|
||||
CHECK_NE(device_idx_, -1);
|
||||
@@ -137,7 +127,6 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
|
||||
auto column = ArrayInterface(get<Object const>(json_col));
|
||||
columns.push_back(column);
|
||||
CHECK_EQ(column.num_cols, 1);
|
||||
column_ptr.emplace_back(column_ptr.back() + column.num_rows);
|
||||
num_rows_ = std::max(num_rows_, size_t(column.num_rows));
|
||||
CHECK_EQ(device_idx_, dh::CudaGetPointerDevice(column.data))
|
||||
<< "All columns should use the same device.";
|
||||
@@ -145,23 +134,20 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
|
||||
<< "All columns should have same number of rows.";
|
||||
}
|
||||
columns_ = columns;
|
||||
column_ptr_ = column_ptr;
|
||||
batch_ = CudfAdapterBatch(dh::ToSpan(columns_), dh::ToSpan(column_ptr_),
|
||||
column_ptr.back());
|
||||
batch_ = CudfAdapterBatch(dh::ToSpan(columns_), num_rows_);
|
||||
}
|
||||
const CudfAdapterBatch& Value() const override {
|
||||
CHECK_EQ(batch_.columns_.data(), columns_.data().get());
|
||||
return batch_;
|
||||
}
|
||||
const CudfAdapterBatch& Value() const override { return batch_; }
|
||||
|
||||
size_t NumRows() const { return num_rows_; }
|
||||
size_t NumColumns() const { return columns_.size(); }
|
||||
size_t DeviceIdx() const { return device_idx_; }
|
||||
|
||||
// Cudf is column major
|
||||
bool IsRowMajor() { return false; }
|
||||
|
||||
private:
|
||||
CudfAdapterBatch batch_;
|
||||
dh::device_vector<ArrayInterface> columns_;
|
||||
dh::device_vector<size_t> column_ptr_; // Exclusive scan of column sizes
|
||||
size_t num_rows_{0};
|
||||
int device_idx_;
|
||||
};
|
||||
@@ -201,8 +187,6 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
|
||||
size_t NumColumns() const { return array_interface_.num_cols; }
|
||||
size_t DeviceIdx() const { return device_idx_; }
|
||||
|
||||
bool IsRowMajor() { return true; }
|
||||
|
||||
private:
|
||||
ArrayInterface array_interface_;
|
||||
CupyAdapterBatch batch_;
|
||||
|
||||
@@ -154,8 +154,8 @@ struct WriteCompressedEllpackFunctor {
|
||||
// Here the data is already correctly ordered and simply needs to be compacted
|
||||
// to remove missing data
|
||||
template <typename AdapterBatchT>
|
||||
void CopyDataRowMajor(const AdapterBatchT& batch, EllpackPageImpl* dst,
|
||||
int device_idx, float missing) {
|
||||
void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
|
||||
int device_idx, float missing) {
|
||||
// Some witchcraft happens here
|
||||
// The goal is to copy valid elements out of the input to an ellpack matrix
|
||||
// with a given row stride, using no extra working memory Standard stream
|
||||
@@ -209,51 +209,6 @@ void CopyDataRowMajor(const AdapterBatchT& batch, EllpackPageImpl* dst,
|
||||
});
|
||||
}
|
||||
|
||||
template <typename AdapterT, typename AdapterBatchT>
|
||||
void CopyDataColumnMajor(AdapterT* adapter, const AdapterBatchT& batch,
|
||||
EllpackPageImpl* dst, float missing) {
|
||||
// Step 1: Get the sizes of the input columns
|
||||
dh::caching_device_vector<size_t> column_sizes(adapter->NumColumns(), 0);
|
||||
auto d_column_sizes = column_sizes.data().get();
|
||||
// Populate column sizes
|
||||
dh::LaunchN(adapter->DeviceIdx(), batch.Size(), [=] __device__(size_t idx) {
|
||||
const auto& e = batch.GetElement(idx);
|
||||
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
|
||||
&d_column_sizes[e.column_idx]),
|
||||
static_cast<unsigned long long>(1)); // NOLINT
|
||||
});
|
||||
|
||||
thrust::host_vector<size_t> host_column_sizes = column_sizes;
|
||||
|
||||
// Step 2: Iterate over columns, place elements in correct row, increment
|
||||
// temporary row pointers
|
||||
dh::caching_device_vector<size_t> temp_row_ptr(adapter->NumRows(), 0);
|
||||
auto d_temp_row_ptr = temp_row_ptr.data().get();
|
||||
auto row_stride = dst->row_stride;
|
||||
size_t begin = 0;
|
||||
auto device_accessor = dst->GetDeviceAccessor(adapter->DeviceIdx());
|
||||
common::CompressedBufferWriter writer(device_accessor.NumSymbols());
|
||||
auto d_compressed_buffer = dst->gidx_buffer.DevicePointer();
|
||||
data::IsValidFunctor is_valid(missing);
|
||||
for (auto size : host_column_sizes) {
|
||||
size_t end = begin + size;
|
||||
dh::LaunchN(adapter->DeviceIdx(), end - begin, [=] __device__(size_t idx) {
|
||||
auto writer_non_const =
|
||||
writer; // For some reason this variable gets captured as const
|
||||
const auto& e = batch.GetElement(idx + begin);
|
||||
if (!is_valid(e)) return;
|
||||
size_t output_position =
|
||||
e.row_idx * row_stride + d_temp_row_ptr[e.row_idx];
|
||||
auto bin_idx = device_accessor.SearchBin(e.value, e.column_idx);
|
||||
writer_non_const.AtomicWriteSymbol(d_compressed_buffer, bin_idx,
|
||||
output_position);
|
||||
d_temp_row_ptr[e.row_idx] += 1;
|
||||
});
|
||||
|
||||
begin = end;
|
||||
}
|
||||
}
|
||||
|
||||
void WriteNullValues(EllpackPageImpl* dst, int device_idx,
|
||||
common::Span<size_t> row_counts) {
|
||||
// Write the null values
|
||||
@@ -284,12 +239,7 @@ EllpackPageImpl::EllpackPageImpl(AdapterT* adapter, float missing, bool is_dense
|
||||
|
||||
*this = EllpackPageImpl(adapter->DeviceIdx(), cuts, is_dense, row_stride,
|
||||
adapter->NumRows());
|
||||
if (adapter->IsRowMajor()) {
|
||||
CopyDataRowMajor(batch, this, adapter->DeviceIdx(), missing);
|
||||
} else {
|
||||
CopyDataColumnMajor(adapter, batch, this, missing);
|
||||
}
|
||||
|
||||
CopyDataToEllpack(batch, this, adapter->DeviceIdx(), missing);
|
||||
WriteNullValues(this, adapter->DeviceIdx(), row_counts_span);
|
||||
}
|
||||
|
||||
|
||||
@@ -35,51 +35,12 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
|
||||
thrust::device_pointer_cast(offset.data()));
|
||||
}
|
||||
|
||||
template <typename AdapterT>
|
||||
void CopyDataColumnMajor(AdapterT* adapter, common::Span<Entry> data,
|
||||
int device_idx, float missing,
|
||||
common::Span<size_t> row_ptr) {
|
||||
// Step 1: Get the sizes of the input columns
|
||||
dh::device_vector<size_t> column_sizes(adapter->NumColumns());
|
||||
auto d_column_sizes = column_sizes.data().get();
|
||||
auto& batch = adapter->Value();
|
||||
// Populate column sizes
|
||||
dh::LaunchN(device_idx, batch.Size(), [=] __device__(size_t idx) {
|
||||
const auto& e = batch.GetElement(idx);
|
||||
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
|
||||
&d_column_sizes[e.column_idx]),
|
||||
static_cast<unsigned long long>(1)); // NOLINT
|
||||
});
|
||||
|
||||
thrust::host_vector<size_t> host_column_sizes = column_sizes;
|
||||
|
||||
// Step 2: Iterate over columns, place elements in correct row, increment
|
||||
// temporary row pointers
|
||||
dh::device_vector<size_t> temp_row_ptr(
|
||||
thrust::device_pointer_cast(row_ptr.data()),
|
||||
thrust::device_pointer_cast(row_ptr.data() + row_ptr.size()));
|
||||
auto d_temp_row_ptr = temp_row_ptr.data().get();
|
||||
size_t begin = 0;
|
||||
IsValidFunctor is_valid(missing);
|
||||
for (auto size : host_column_sizes) {
|
||||
size_t end = begin + size;
|
||||
dh::LaunchN(device_idx, end - begin, [=] __device__(size_t idx) {
|
||||
const auto& e = batch.GetElement(idx + begin);
|
||||
if (!is_valid(e)) return;
|
||||
data[d_temp_row_ptr[e.row_idx]] = Entry(e.column_idx, e.value);
|
||||
d_temp_row_ptr[e.row_idx] += 1;
|
||||
});
|
||||
|
||||
begin = end;
|
||||
}
|
||||
}
|
||||
|
||||
// Here the data is already correctly ordered and simply needs to be compacted
|
||||
// to remove missing data
|
||||
template <typename AdapterT>
|
||||
void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
|
||||
int device_idx, float missing,
|
||||
common::Span<size_t> row_ptr) {
|
||||
void CopyDataToDMatrix(AdapterT* adapter, common::Span<Entry> data,
|
||||
int device_idx, float missing,
|
||||
common::Span<size_t> row_ptr) {
|
||||
auto& batch = adapter->Value();
|
||||
auto transform_f = [=] __device__(size_t idx) {
|
||||
const auto& e = batch.GetElement(idx);
|
||||
@@ -116,13 +77,8 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
CountRowOffsets(batch, s_offset, adapter->DeviceIdx(), missing);
|
||||
info_.num_nonzero_ = sparse_page_.offset.HostVector().back();
|
||||
sparse_page_.data.Resize(info_.num_nonzero_);
|
||||
if (adapter->IsRowMajor()) {
|
||||
CopyDataRowMajor(adapter, sparse_page_.data.DeviceSpan(),
|
||||
adapter->DeviceIdx(), missing, s_offset);
|
||||
} else {
|
||||
CopyDataColumnMajor(adapter, sparse_page_.data.DeviceSpan(),
|
||||
adapter->DeviceIdx(), missing, s_offset);
|
||||
}
|
||||
CopyDataToDMatrix(adapter, sparse_page_.data.DeviceSpan(),
|
||||
adapter->DeviceIdx(), missing, s_offset);
|
||||
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
info_.num_row_ = adapter->NumRows();
|
||||
|
||||
Reference in New Issue
Block a user