Use std::uint64_t for row index. (#10120)
- Use std::uint64_t instead of size_t to avoid implementation-defined type. - Rename to bst_idx_t, to account for other types of indexing. - Small cleanup to the base header.
This commit is contained in:
@@ -73,11 +73,11 @@ constexpr size_t kAdapterUnknownSize = std::numeric_limits<size_t >::max();
|
||||
|
||||
struct COOTuple {
|
||||
COOTuple() = default;
|
||||
XGBOOST_DEVICE COOTuple(size_t row_idx, size_t column_idx, float value)
|
||||
XGBOOST_DEVICE COOTuple(bst_idx_t row_idx, bst_idx_t column_idx, float value)
|
||||
: row_idx(row_idx), column_idx(column_idx), value(value) {}
|
||||
|
||||
size_t row_idx{0};
|
||||
size_t column_idx{0};
|
||||
bst_idx_t row_idx{0};
|
||||
bst_idx_t column_idx{0};
|
||||
float value{0};
|
||||
};
|
||||
|
||||
@@ -136,12 +136,8 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
|
||||
public:
|
||||
class Line {
|
||||
public:
|
||||
Line(size_t row_idx, size_t size, const unsigned* feature_idx,
|
||||
const float* values)
|
||||
: row_idx_(row_idx),
|
||||
size_(size),
|
||||
feature_idx_(feature_idx),
|
||||
values_(values) {}
|
||||
Line(bst_idx_t row_idx, bst_idx_t size, const unsigned* feature_idx, const float* values)
|
||||
: row_idx_(row_idx), size_(size), feature_idx_(feature_idx), values_(values) {}
|
||||
|
||||
size_t Size() const { return size_; }
|
||||
COOTuple GetElement(size_t idx) const {
|
||||
@@ -149,8 +145,8 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
|
||||
}
|
||||
|
||||
private:
|
||||
size_t row_idx_;
|
||||
size_t size_;
|
||||
bst_idx_t row_idx_;
|
||||
bst_idx_t size_;
|
||||
const unsigned* feature_idx_;
|
||||
const float* values_;
|
||||
};
|
||||
@@ -178,29 +174,25 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
|
||||
|
||||
class CSRAdapter : public detail::SingleBatchDataIter<CSRAdapterBatch> {
|
||||
public:
|
||||
CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx,
|
||||
const float* values, size_t num_rows, size_t num_elements,
|
||||
size_t num_features)
|
||||
: batch_(row_ptr, feature_idx, values, num_rows, num_elements,
|
||||
num_features),
|
||||
CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx, const float* values,
|
||||
bst_idx_t num_rows, bst_idx_t num_elements, size_t num_features)
|
||||
: batch_(row_ptr, feature_idx, values, num_rows, num_elements, num_features),
|
||||
num_rows_(num_rows),
|
||||
num_columns_(num_features) {}
|
||||
const CSRAdapterBatch& Value() const override { return batch_; }
|
||||
size_t NumRows() const { return num_rows_; }
|
||||
size_t NumColumns() const { return num_columns_; }
|
||||
bst_idx_t NumRows() const { return num_rows_; }
|
||||
bst_idx_t NumColumns() const { return num_columns_; }
|
||||
|
||||
private:
|
||||
CSRAdapterBatch batch_;
|
||||
size_t num_rows_;
|
||||
size_t num_columns_;
|
||||
bst_idx_t num_rows_;
|
||||
bst_idx_t num_columns_;
|
||||
};
|
||||
|
||||
class DenseAdapterBatch : public detail::NoMetaInfo {
|
||||
public:
|
||||
DenseAdapterBatch(const float* values, size_t num_rows, size_t num_features)
|
||||
: values_(values),
|
||||
num_rows_(num_rows),
|
||||
num_features_(num_features) {}
|
||||
DenseAdapterBatch(const float* values, bst_idx_t num_rows, bst_idx_t num_features)
|
||||
: values_(values), num_rows_(num_rows), num_features_(num_features) {}
|
||||
|
||||
private:
|
||||
class Line {
|
||||
@@ -910,7 +902,7 @@ class SparsePageAdapterBatch {
|
||||
struct Line {
|
||||
Entry const* inst;
|
||||
size_t n;
|
||||
bst_row_t ridx;
|
||||
bst_idx_t ridx;
|
||||
COOTuple GetElement(size_t idx) const { return {ridx, inst[idx].index, inst[idx].fvalue}; }
|
||||
size_t Size() const { return n; }
|
||||
};
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
#include "simple_dmatrix.h" // for SimpleDMatrix
|
||||
#include "sparse_page_writer.h" // for SparsePageFormatReg
|
||||
#include "validation.h" // for LabelsCheck, WeightsCheck, ValidateQueryGroup
|
||||
#include "xgboost/base.h" // for bst_group_t, bst_row_t, bst_float, bst_ulong
|
||||
#include "xgboost/base.h" // for bst_group_t, bst_idx_t, bst_float, bst_ulong
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/learner.h" // for HostDeviceVector
|
||||
@@ -996,7 +996,7 @@ template DMatrix* DMatrix::Create(
|
||||
|
||||
SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
|
||||
SparsePage transpose;
|
||||
common::ParallelGroupBuilder<Entry, bst_row_t> builder(&transpose.offset.HostVector(),
|
||||
common::ParallelGroupBuilder<Entry, bst_idx_t> builder(&transpose.offset.HostVector(),
|
||||
&transpose.data.HostVector());
|
||||
builder.InitBudget(num_columns, n_threads);
|
||||
long batch_size = static_cast<long>(this->Size()); // NOLINT(*)
|
||||
@@ -1192,7 +1192,7 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
|
||||
|
||||
void SparsePage::PushCSC(const SparsePage &batch) {
|
||||
std::vector<xgboost::Entry>& self_data = data.HostVector();
|
||||
std::vector<bst_row_t>& self_offset = offset.HostVector();
|
||||
std::vector<bst_idx_t>& self_offset = offset.HostVector();
|
||||
|
||||
auto const& other_data = batch.data.ConstHostVector();
|
||||
auto const& other_offset = batch.offset.ConstHostVector();
|
||||
@@ -1211,7 +1211,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<bst_row_t> offset(other_offset.size());
|
||||
std::vector<bst_idx_t> offset(other_offset.size());
|
||||
offset[0] = 0;
|
||||
|
||||
std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
|
||||
|
||||
@@ -39,7 +39,7 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
return {row_idx, column_idx, value};
|
||||
}
|
||||
|
||||
[[nodiscard]] __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
|
||||
[[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {
|
||||
auto const& column = columns_[fidx];
|
||||
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
|
||||
? column(ridx)
|
||||
@@ -47,8 +47,8 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
return value;
|
||||
}
|
||||
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return num_rows_; }
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return columns_.size(); }
|
||||
|
||||
private:
|
||||
common::Span<ArrayInterface<1>> columns_;
|
||||
@@ -168,13 +168,13 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
|
||||
float value = array_interface_(row_idx, column_idx);
|
||||
return {row_idx, column_idx, value};
|
||||
}
|
||||
[[nodiscard]] __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
|
||||
[[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {
|
||||
float value = array_interface_(ridx, fidx);
|
||||
return value;
|
||||
}
|
||||
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); }
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); }
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return array_interface_.Shape(0); }
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return array_interface_.Shape(1); }
|
||||
|
||||
private:
|
||||
ArrayInterface<2> array_interface_;
|
||||
@@ -208,8 +208,8 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
|
||||
|
||||
// Returns maximum row length
|
||||
template <typename AdapterBatchT>
|
||||
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, DeviceOrd device,
|
||||
float missing) {
|
||||
bst_idx_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_idx_t> offset, DeviceOrd device,
|
||||
float missing) {
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
IsValidFunctor is_valid(missing);
|
||||
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
|
||||
@@ -231,7 +231,7 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
|
||||
|
||||
// Count elements per row
|
||||
dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) {
|
||||
bst_row_t cnt{0};
|
||||
bst_idx_t cnt{0};
|
||||
auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);
|
||||
SPAN_CHECK(ridx < n_samples);
|
||||
for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {
|
||||
@@ -245,10 +245,10 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
|
||||
static_cast<unsigned long long>(cnt)); // NOLINT
|
||||
});
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
bst_row_t row_stride =
|
||||
bst_idx_t row_stride =
|
||||
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
||||
static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
|
||||
static_cast<bst_idx_t>(0), thrust::maximum<bst_idx_t>());
|
||||
return row_stride;
|
||||
}
|
||||
|
||||
|
||||
@@ -193,7 +193,7 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||
|
||||
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
|
||||
std::vector<float> const &values, std::vector<float> const &mins,
|
||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
|
||||
bst_idx_t ridx, bst_feature_t fidx, bool is_cat) const {
|
||||
if (is_cat) {
|
||||
auto gidx = GetGindex(ridx, fidx);
|
||||
if (gidx == -1) {
|
||||
|
||||
@@ -149,7 +149,7 @@ class GHistIndexMatrix {
|
||||
/** @brief max_bin for each feature. */
|
||||
bst_bin_t max_numeric_bins_per_feat;
|
||||
/** @brief base row index for current page (used by external memory) */
|
||||
bst_row_t base_rowid{0};
|
||||
bst_idx_t base_rowid{0};
|
||||
|
||||
[[nodiscard]] bst_bin_t MaxNumBinPerFeat() const {
|
||||
return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);
|
||||
@@ -230,7 +230,7 @@ class GHistIndexMatrix {
|
||||
*/
|
||||
[[nodiscard]] std::size_t RowIdx(size_t ridx) const { return row_ptr[ridx - base_rowid]; }
|
||||
|
||||
[[nodiscard]] bst_row_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
|
||||
[[nodiscard]] bst_idx_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
|
||||
[[nodiscard]] bst_feature_t Features() const { return cut.Ptrs().size() - 1; }
|
||||
|
||||
[[nodiscard]] bool ReadColumnPage(common::AlignedResourceReadStream* fi);
|
||||
@@ -243,7 +243,7 @@ class GHistIndexMatrix {
|
||||
[[nodiscard]] float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
||||
[[nodiscard]] float GetFvalue(std::vector<std::uint32_t> const& ptrs,
|
||||
std::vector<float> const& values, std::vector<float> const& mins,
|
||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const;
|
||||
bst_idx_t ridx, bst_feature_t fidx, bool is_cat) const;
|
||||
|
||||
[[nodiscard]] common::HistogramCuts& Cuts() { return cut; }
|
||||
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }
|
||||
|
||||
@@ -132,7 +132,7 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
|
||||
return HostAdapterDispatch(proxy, [](auto const& value) { return value.NumCols(); });
|
||||
};
|
||||
|
||||
std::vector<std::size_t> column_sizes;
|
||||
std::vector<bst_idx_t> column_sizes;
|
||||
auto const is_valid = data::IsValidFunctor{missing};
|
||||
auto nnz_cnt = [&]() {
|
||||
return HostAdapterDispatch(proxy, [&](auto const& value) {
|
||||
|
||||
@@ -59,7 +59,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
auto& h_data = out_page.data.HostVector();
|
||||
auto& h_offset = out_page.offset.HostVector();
|
||||
size_t rptr{0};
|
||||
for (bst_row_t i = 0; i < this->Info().num_row_; i++) {
|
||||
for (bst_idx_t i = 0; i < this->Info().num_row_; i++) {
|
||||
auto inst = batch[i];
|
||||
auto prev_size = h_data.size();
|
||||
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
|
||||
|
||||
@@ -40,7 +40,7 @@ void CopyDataToDMatrix(AdapterBatchT batch, common::Span<Entry> data,
|
||||
}
|
||||
|
||||
template <typename AdapterBatchT>
|
||||
void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset, DeviceOrd device,
|
||||
void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_idx_t> offset, DeviceOrd device,
|
||||
float missing) {
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
IsValidFunctor is_valid(missing);
|
||||
|
||||
Reference in New Issue
Block a user