[DISK] Add shard option to disk
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include "./sparse_batch_page.h"
|
||||
#include "../common/common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -21,9 +22,9 @@ namespace data {
|
||||
class SparsePageDMatrix : public DMatrix {
|
||||
public:
|
||||
explicit SparsePageDMatrix(std::unique_ptr<DataSource>&& source,
|
||||
const std::string& cache_prefix)
|
||||
: source_(std::move(source)),
|
||||
cache_prefix_(cache_prefix) {}
|
||||
const std::string& cache_info)
|
||||
: source_(std::move(source)), cache_info_(cache_info) {
|
||||
}
|
||||
|
||||
MetaInfo& info() override {
|
||||
return source_->info;
|
||||
@@ -77,11 +78,9 @@ class SparsePageDMatrix : public DMatrix {
|
||||
// declare the column batch iter.
|
||||
class ColPageIter : public dmlc::DataIter<ColBatch> {
|
||||
public:
|
||||
explicit ColPageIter(std::unique_ptr<dmlc::SeekStream>&& fi);
|
||||
explicit ColPageIter(std::vector<std::unique_ptr<dmlc::SeekStream> >&& files);
|
||||
virtual ~ColPageIter();
|
||||
void BeforeFirst() override {
|
||||
prefetcher_.BeforeFirst();
|
||||
}
|
||||
void BeforeFirst() override;
|
||||
const ColBatch &Value() const override {
|
||||
return out_;
|
||||
}
|
||||
@@ -90,20 +89,22 @@ class SparsePageDMatrix : public DMatrix {
|
||||
void Init(const std::vector<bst_uint>& index_set, bool load_all);
|
||||
|
||||
private:
|
||||
// data file pointer.
|
||||
std::unique_ptr<dmlc::SeekStream> fi_;
|
||||
// the temp page.
|
||||
SparsePage* page_;
|
||||
// internal clock ptr.
|
||||
size_t clock_ptr_;
|
||||
// data file pointer.
|
||||
std::vector<std::unique_ptr<dmlc::SeekStream> > files_;
|
||||
// page format.
|
||||
std::unique_ptr<SparsePage::Format> format_;
|
||||
std::vector<std::unique_ptr<SparsePage::Format> > formats_;
|
||||
/*! \brief internal prefetcher. */
|
||||
std::vector<std::unique_ptr<dmlc::ThreadedIter<SparsePage> > > prefetchers_;
|
||||
// The index set to be loaded.
|
||||
std::vector<bst_uint> index_set_;
|
||||
// The index set by the outsiders
|
||||
std::vector<bst_uint> set_index_set_;
|
||||
// whether to load data dataset.
|
||||
bool set_load_all_, load_all_;
|
||||
// data prefetcher.
|
||||
dmlc::ThreadedIter<SparsePage> prefetcher_;
|
||||
// temporal space for batch
|
||||
ColBatch out_;
|
||||
// the pointer data.
|
||||
@@ -117,7 +118,7 @@ class SparsePageDMatrix : public DMatrix {
|
||||
// source data pointer.
|
||||
std::unique_ptr<DataSource> source_;
|
||||
// the cache prefix
|
||||
std::string cache_prefix_;
|
||||
std::string cache_info_;
|
||||
/*! \brief list of row index that are buffered */
|
||||
std::vector<bst_uint> buffered_rowset_;
|
||||
// count for column data
|
||||
|
||||
Reference in New Issue
Block a user