Write ELLPACK pages to disk (#4879)
* add ellpack source * add batch param * extract function to parse cache info * construct ellpack info separately * push batch to ellpack page * write ellpack page. * make sparse page source reusable
This commit is contained in:
@@ -12,9 +12,10 @@ namespace data {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(sparse_page_raw_format);
|
||||
|
||||
class SparsePageRawFormat : public SparsePageFormat {
|
||||
template<typename T>
|
||||
class SparsePageRawFormat : public SparsePageFormat<T> {
|
||||
public:
|
||||
bool Read(SparsePage* page, dmlc::SeekStream* fi) override {
|
||||
bool Read(T* page, dmlc::SeekStream* fi) override {
|
||||
auto& offset_vec = page->offset.HostVector();
|
||||
if (!fi->Read(&offset_vec)) return false;
|
||||
auto& data_vec = page->data.HostVector();
|
||||
@@ -29,7 +30,7 @@ class SparsePageRawFormat : public SparsePageFormat {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Read(SparsePage* page,
|
||||
bool Read(T* page,
|
||||
dmlc::SeekStream* fi,
|
||||
const std::vector<bst_uint>& sorted_index_set) override {
|
||||
if (!fi->Read(&disk_offset_)) return false;
|
||||
@@ -79,7 +80,7 @@ class SparsePageRawFormat : public SparsePageFormat {
|
||||
return true;
|
||||
}
|
||||
|
||||
void Write(const SparsePage& page, dmlc::Stream* fo) override {
|
||||
void Write(const T& page, dmlc::Stream* fo) override {
|
||||
const auto& offset_vec = page.offset.HostVector();
|
||||
const auto& data_vec = page.data.HostVector();
|
||||
CHECK(page.offset.Size() != 0 && offset_vec[0] == 0);
|
||||
@@ -98,7 +99,20 @@ class SparsePageRawFormat : public SparsePageFormat {
|
||||
XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(raw)
|
||||
.describe("Raw binary data format.")
|
||||
.set_body([]() {
|
||||
return new SparsePageRawFormat();
|
||||
return new SparsePageRawFormat<SparsePage>();
|
||||
});
|
||||
|
||||
XGBOOST_REGISTER_CSC_PAGE_FORMAT(raw)
|
||||
.describe("Raw binary data format.")
|
||||
.set_body([]() {
|
||||
return new SparsePageRawFormat<CSCPage>();
|
||||
});
|
||||
|
||||
XGBOOST_REGISTER_SORTED_CSC_PAGE_FORMAT(raw)
|
||||
.describe("Raw binary data format.")
|
||||
.set_body([]() {
|
||||
return new SparsePageRawFormat<SortedCSCPage>();
|
||||
});
|
||||
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user