Fix crash with approx tree method on cpu (#4510)
This commit is contained in:
parent
c589eff941
commit
6e16900711
@ -376,7 +376,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
|
||||
std::vector<size_t> offset(other_offset.size());
|
||||
offset[0] = 0;
|
||||
|
||||
std::vector<xgboost::Entry> data(self_data.size() + batch.data.Size());
|
||||
std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
|
||||
|
||||
// n_cols in original csr data matrix, here in csc is n_rows
|
||||
size_t const n_features = other_offset.size() - 1;
|
||||
@ -385,7 +385,11 @@ void SparsePage::PushCSC(const SparsePage &batch) {
|
||||
for (size_t i = 0; i < n_features; ++i) {
|
||||
size_t const self_beg = self_offset.at(i);
|
||||
size_t const self_length = self_offset.at(i+1) - self_beg;
|
||||
CHECK_LT(beg, data.size());
|
||||
// It is possible that the current feature and further features aren't referenced
|
||||
// in any rows accumulated thus far. It is also possible for this to happen
|
||||
// in the current sparse page row batch as well.
|
||||
// Hence, the incremental number of rows may stay constant thus equaling the data size
|
||||
CHECK_LE(beg, data.size());
|
||||
std::memcpy(dmlc::BeginPtr(data)+beg,
|
||||
dmlc::BeginPtr(self_data) + self_beg,
|
||||
sizeof(Entry) * self_length);
|
||||
@ -393,7 +397,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
|
||||
|
||||
size_t const other_beg = other_offset.at(i);
|
||||
size_t const other_length = other_offset.at(i+1) - other_beg;
|
||||
CHECK_LT(beg, data.size());
|
||||
CHECK_LE(beg, data.size());
|
||||
std::memcpy(dmlc::BeginPtr(data)+beg,
|
||||
dmlc::BeginPtr(other_data) + other_beg,
|
||||
sizeof(Entry) * other_length);
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(SparsePage, PushCSC) {
|
||||
@ -52,4 +53,28 @@ TEST(SparsePage, PushCSC) {
|
||||
ASSERT_EQ(inst[i].index, indices_sol[i % 3]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SparsePage, PushCSCAfterTranspose) {
|
||||
const int n_entries = 9;
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(n_entries, 64UL);
|
||||
const int ncols = dmat->Info().num_col_;
|
||||
SparsePage page; // Consolidated sparse page
|
||||
for (const auto &batch : dmat->GetRowBatches()) {
|
||||
// Transpose each batch and push
|
||||
SparsePage tmp = batch.GetTranspose(ncols);
|
||||
page.PushCSC(tmp);
|
||||
}
|
||||
|
||||
// Make sure that the final sparse page has the right number of entries
|
||||
ASSERT_EQ(n_entries, page.data.Size());
|
||||
|
||||
// The feature value for a feature in each row should be identical, as that is
|
||||
// how the dmatrix has been created
|
||||
for (int i = 0; i < page.Size(); ++i) {
|
||||
auto inst = page[i];
|
||||
for (int j = 1; j < inst.size(); ++j) {
|
||||
ASSERT_EQ(inst[0].fvalue, inst[j].fvalue);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user