Fix crash with approx tree method on cpu (#4510)

This commit is contained in:
sriramch 2019-05-29 10:11:29 -07:00 committed by Jiaming Yuan
parent c589eff941
commit 6e16900711
2 changed files with 32 additions and 3 deletions

View File

@ -376,7 +376,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
std::vector<size_t> offset(other_offset.size());
offset[0] = 0;
std::vector<xgboost::Entry> data(self_data.size() + batch.data.Size());
std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
// n_cols in original csr data matrix, here in csc is n_rows
size_t const n_features = other_offset.size() - 1;
@ -385,7 +385,11 @@ void SparsePage::PushCSC(const SparsePage &batch) {
for (size_t i = 0; i < n_features; ++i) {
size_t const self_beg = self_offset.at(i);
size_t const self_length = self_offset.at(i+1) - self_beg;
CHECK_LT(beg, data.size());
// It is possible that the current feature and further features aren't referenced
// in any rows accumulated thus far. It is also possible for this to happen
// in the current sparse page row batch as well.
// Hence, the incremental number of rows may stay constant thus equaling the data size
CHECK_LE(beg, data.size());
std::memcpy(dmlc::BeginPtr(data)+beg,
dmlc::BeginPtr(self_data) + self_beg,
sizeof(Entry) * self_length);
@ -393,7 +397,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
size_t const other_beg = other_offset.at(i);
size_t const other_length = other_offset.at(i+1) - other_beg;
CHECK_LT(beg, data.size());
CHECK_LE(beg, data.size());
std::memcpy(dmlc::BeginPtr(data)+beg,
dmlc::BeginPtr(other_data) + other_beg,
sizeof(Entry) * other_length);

View File

@ -2,6 +2,7 @@
#include <vector>
#include "xgboost/data.h"
#include "../helpers.h"
namespace xgboost {
TEST(SparsePage, PushCSC) {
@ -52,4 +53,28 @@ TEST(SparsePage, PushCSC) {
ASSERT_EQ(inst[i].index, indices_sol[i % 3]);
}
}
TEST(SparsePage, PushCSCAfterTranspose) {
const int n_entries = 9;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(n_entries, 64UL);
const int ncols = dmat->Info().num_col_;
SparsePage page; // Consolidated sparse page
for (const auto &batch : dmat->GetRowBatches()) {
// Transpose each batch and push
SparsePage tmp = batch.GetTranspose(ncols);
page.PushCSC(tmp);
}
// Make sure that the final sparse page has the right number of entries
ASSERT_EQ(n_entries, page.data.Size());
// The feature value for a feature in each row should be identical, as that is
// how the dmatrix has been created
for (int i = 0; i < page.Size(); ++i) {
auto inst = page[i];
for (int j = 1; j < inst.size(); ++j) {
ASSERT_EQ(inst[0].fvalue, inst[j].fvalue);
}
}
}
} // namespace xgboost