Write ELLPACK pages to disk (#4879)

* add ellpack source
* add batch param
* extract function to parse cache info
* construct ellpack info separately
* push batch to ellpack page
* write ellpack page.
* make sparse page source reusable
This commit is contained in:
Rong Ou
2019-10-22 20:44:32 -07:00
committed by Jiaming Yuan
parent 310fe60b35
commit 5b1715d97c
25 changed files with 935 additions and 408 deletions

View File

@@ -12,9 +12,10 @@ namespace data {
DMLC_REGISTRY_FILE_TAG(sparse_page_raw_format);
class SparsePageRawFormat : public SparsePageFormat {
template<typename T>
class SparsePageRawFormat : public SparsePageFormat<T> {
public:
bool Read(SparsePage* page, dmlc::SeekStream* fi) override {
bool Read(T* page, dmlc::SeekStream* fi) override {
auto& offset_vec = page->offset.HostVector();
if (!fi->Read(&offset_vec)) return false;
auto& data_vec = page->data.HostVector();
@@ -29,7 +30,7 @@ class SparsePageRawFormat : public SparsePageFormat {
return true;
}
bool Read(SparsePage* page,
bool Read(T* page,
dmlc::SeekStream* fi,
const std::vector<bst_uint>& sorted_index_set) override {
if (!fi->Read(&disk_offset_)) return false;
@@ -79,7 +80,7 @@ class SparsePageRawFormat : public SparsePageFormat {
return true;
}
void Write(const SparsePage& page, dmlc::Stream* fo) override {
void Write(const T& page, dmlc::Stream* fo) override {
const auto& offset_vec = page.offset.HostVector();
const auto& data_vec = page.data.HostVector();
CHECK(page.offset.Size() != 0 && offset_vec[0] == 0);
@@ -98,7 +99,20 @@ class SparsePageRawFormat : public SparsePageFormat {
XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat();
return new SparsePageRawFormat<SparsePage>();
});
XGBOOST_REGISTER_CSC_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<CSCPage>();
});
XGBOOST_REGISTER_SORTED_CSC_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<SortedCSCPage>();
});
} // namespace data
} // namespace xgboost