Use DataSplitMode to configure data loading (#8434)

* Use `DataSplitMode` to configure data loading
This commit is contained in:
Rong Ou
2022-11-08 00:21:50 -08:00
committed by GitHub
parent 0d3da9869c
commit 8e76f5f595
13 changed files with 46 additions and 40 deletions

View File

@@ -40,6 +40,10 @@ enum class DataType : uint8_t {
enum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 };
enum class DataSplitMode : int {
kAuto = 0, kCol = 1, kRow = 2, kNone = 3
};
/*!
* \brief Meta information about dataset, always sit in memory.
*/
@@ -537,7 +541,7 @@ class DMatrix {
* \brief Load DMatrix from URI.
* \param uri The URI of input.
* \param silent Whether print information during loading.
* \param load_row_split Flag to read in part of rows, divided among the workers in distributed mode.
* \param data_split_mode Mode to read in part of the data, divided among the workers in distributed mode.
* \param file_format The format type of the file, used for dmlc::Parser::Create.
* By default "auto" will be able to load in both local binary file.
* \param page_size Page size for external memory.
@@ -545,7 +549,7 @@ class DMatrix {
*/
static DMatrix* Load(const std::string& uri,
bool silent,
bool load_row_split,
DataSplitMode data_split_mode,
const std::string& file_format = "auto");
/**
@@ -678,6 +682,8 @@ inline BatchSet<ExtSparsePage> DMatrix::GetBatches() {
}
} // namespace xgboost
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
namespace dmlc {
DMLC_DECLARE_TRAITS(is_pod, xgboost::Entry, true);