xgboost/src/data/simple_dmatrix.h
Jiaming Yuan bd1f3a38f0
Rewrite sparse dmatrix using callbacks. (#7092)
- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves.
- Remove use of threaded iterator and IO queue.
- Remove `page_size`.
- Make sure the number of pages in memory is bounded.
- Make sure the cache can not be violated.
- Provide an interface for internal algorithms to process data asynchronously.
2021-07-16 12:33:31 +08:00

68 lines
1.9 KiB
C++

/*!
* Copyright 2015-2021 by Contributors
* \file simple_dmatrix.h
* \brief In-memory version of DMatrix.
* \author Tianqi Chen
*/
#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_H_
#define XGBOOST_DATA_SIMPLE_DMATRIX_H_
#include <xgboost/base.h>
#include <xgboost/data.h>
#include <memory>
#include <string>
#include "gradient_index.h"
namespace xgboost {
namespace data {
// Used for single batch data.
class SimpleDMatrix : public DMatrix {
public:
SimpleDMatrix() = default;
template <typename AdapterT>
explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread);
explicit SimpleDMatrix(dmlc::Stream* in_stream);
~SimpleDMatrix() override = default;
void SaveToLocalFile(const std::string& fname);
MetaInfo& Info() override;
const MetaInfo& Info() const override;
bool SingleColBlock() const override { return true; }
DMatrix* Slice(common::Span<int32_t const> ridxs) override;
/*! \brief magic number used to identify SimpleDMatrix binary files */
static const int kMagic = 0xffffab01;
private:
BatchSet<SparsePage> GetRowBatches() override;
BatchSet<CSCPage> GetColumnBatches() override;
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override;
MetaInfo info_;
// Primary storage type
std::shared_ptr<SparsePage> sparse_page_ = std::make_shared<SparsePage>();
std::shared_ptr<CSCPage> column_page_;
std::shared_ptr<SortedCSCPage> sorted_column_page_;
std::shared_ptr<EllpackPage> ellpack_page_;
std::shared_ptr<GHistIndexMatrix> gradient_index_;
BatchParam batch_param_;
bool EllpackExists() const override {
return static_cast<bool>(ellpack_page_);
}
bool SparsePageExists() const override {
return true;
}
};
} // namespace data
} // namespace xgboost
#endif // XGBOOST_DATA_SIMPLE_DMATRIX_H_