Add PushCSC for SparsePage. (#4193)
* Add PushCSC for SparsePage. * Move Push* definitions into cc file. * Add std:: prefix to `size_t` make clang++ happy. * Address monitor count == 0.
This commit is contained in:
55
tests/cpp/data/test_data.cc
Normal file
55
tests/cpp/data/test_data.cc
Normal file
@@ -0,0 +1,55 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(SparsePage, PushCSC) {
|
||||
std::vector<size_t> offset {0};
|
||||
std::vector<Entry> data;
|
||||
SparsePage page;
|
||||
page.offset.HostVector() = offset;
|
||||
page.data.HostVector() = data;
|
||||
|
||||
offset = {0, 1, 4};
|
||||
for (size_t i = 0; i < offset.back(); ++i) {
|
||||
data.push_back(Entry(i, 0.1f));
|
||||
}
|
||||
|
||||
SparsePage other;
|
||||
other.offset.HostVector() = offset;
|
||||
other.data.HostVector() = data;
|
||||
|
||||
page.PushCSC(other);
|
||||
|
||||
ASSERT_EQ(page.offset.HostVector().size(), offset.size());
|
||||
ASSERT_EQ(page.data.HostVector().size(), data.size());
|
||||
for (size_t i = 0; i < offset.size(); ++i) {
|
||||
ASSERT_EQ(page.offset.HostVector()[i], offset[i]);
|
||||
}
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
ASSERT_EQ(page.data.HostVector()[i].index, data[i].index);
|
||||
}
|
||||
|
||||
page.PushCSC(other);
|
||||
ASSERT_EQ(page.offset.HostVector().size(), offset.size());
|
||||
ASSERT_EQ(page.data.Size(), data.size() * 2);
|
||||
|
||||
for (size_t i = 0; i < offset.size(); ++i) {
|
||||
ASSERT_EQ(page.offset.HostVector()[i], offset[i] * 2);
|
||||
}
|
||||
|
||||
auto inst = page[0];
|
||||
ASSERT_EQ(inst.size(), 2);
|
||||
for (auto entry : inst) {
|
||||
ASSERT_EQ(entry.index, 0);
|
||||
}
|
||||
|
||||
inst = page[1];
|
||||
ASSERT_EQ(inst.size(), 6);
|
||||
std::vector<size_t> indices_sol {1, 2, 3};
|
||||
for (size_t i = 0; i < inst.size(); ++i) {
|
||||
ASSERT_EQ(inst[i].index, indices_sol[i % 3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <vector>
|
||||
#include "helpers.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "dmlc/filesystem.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -92,4 +93,26 @@ TEST(Learner, CheckGroup) {
|
||||
delete pp_mat;
|
||||
}
|
||||
|
||||
TEST(Learner, SLOW_CheckMultiBatch) {
|
||||
using Arg = std::pair<std::string, std::string>;
|
||||
// Create sufficiently large data to make two row pages
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/big.libsvm";
|
||||
CreateBigTestData(tmp_file, 5000000);
|
||||
std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load( tmp_file + "#" + tmp_file + ".cache", true, false));
|
||||
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
|
||||
EXPECT_FALSE(dmat->SingleColBlock());
|
||||
size_t num_row = dmat->Info().num_row_;
|
||||
std::vector<bst_float> labels(num_row);
|
||||
for (size_t i = 0; i < num_row; ++i) {
|
||||
labels[i] = i % 2;
|
||||
}
|
||||
dmat->Info().SetInfo("label", labels.data(), DataType::kFloat32, num_row);
|
||||
std::vector<std::shared_ptr<DMatrix>> mat{dmat};
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->Configure({Arg{"objective", "binary:logistic"}});
|
||||
learner->InitModel();
|
||||
learner->UpdateOneIter(0, dmat.get());
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user