xgboost/src/data/sparse_page_raw_format.cc
Jiaming Yuan bc267dd729
Use ptr from mmap for GHistIndexMatrix and ColumnMatrix. (#9315)
* Use ptr from mmap for `GHistIndexMatrix` and `ColumnMatrix`.

- Define a resource for holding various types of memory pointers.
- Define ref vector for holding resources.
- Swap the underlying resources for GHist and ColumnM.
- Add documentation for current status.
- s390x support is removed. It should work if you can compile XGBoost, all the old workaround code does is to get GCC to compile.
2023-06-27 19:05:46 +08:00

76 lines
2.1 KiB
C++

/**
* Copyright 2015-2023, XGBoost Contributors
* \file sparse_page_raw_format.cc
* Raw binary format of sparse page.
*/
#include <dmlc/registry.h>
#include "../common/io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "../common/ref_resource_view.h" // for WriteVec
#include "./sparse_page_writer.h"
#include "xgboost/data.h"
#include "xgboost/logging.h"
namespace xgboost::data {
DMLC_REGISTRY_FILE_TAG(sparse_page_raw_format);
template <typename T>
class SparsePageRawFormat : public SparsePageFormat<T> {
public:
bool Read(T* page, common::AlignedResourceReadStream* fi) override {
auto& offset_vec = page->offset.HostVector();
if (!common::ReadVec(fi, &offset_vec)) {
return false;
}
auto& data_vec = page->data.HostVector();
CHECK_NE(page->offset.Size(), 0U) << "Invalid SparsePage file";
data_vec.resize(offset_vec.back());
if (page->data.Size() != 0) {
if (!common::ReadVec(fi, &data_vec)) {
return false;
}
}
if (!fi->Read(&page->base_rowid, sizeof(page->base_rowid))) {
return false;
}
return true;
}
std::size_t Write(const T& page, common::AlignedFileWriteStream* fo) override {
const auto& offset_vec = page.offset.HostVector();
const auto& data_vec = page.data.HostVector();
CHECK(page.offset.Size() != 0 && offset_vec[0] == 0);
CHECK_EQ(offset_vec.back(), page.data.Size());
std::size_t bytes{0};
bytes += common::WriteVec(fo, offset_vec);
if (page.data.Size() != 0) {
bytes += common::WriteVec(fo, data_vec);
}
bytes += fo->Write(&page.base_rowid, sizeof(page.base_rowid));
return bytes;
}
private:
};
XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<SparsePage>();
});
XGBOOST_REGISTER_CSC_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<CSCPage>();
});
XGBOOST_REGISTER_SORTED_CSC_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<SortedCSCPage>();
});
} // namespace xgboost::data