xgboost/tests/cpp/data/test_file_iterator.cc
Jiaming Yuan 1f9a57d17b
[Breaking] Require format to be specified in input URI. (#9077)
Previously, we use `libsvm` as default when format is not specified. However, the dmlc
data parser is not particularly robust against errors, and the most common type of error
is undefined format.

Along with which, we will recommend users to use other data loader instead. We will
continue the maintenance of the parsers as it's currently used for many internal tests
including federated learning.
2023-04-28 19:45:15 +08:00

46 lines
1.2 KiB
C++

/**
* Copyright 2021-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <any> // for any_cast
#include <memory>
#include "../../../src/data/adapter.h"
#include "../../../src/data/file_iterator.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost::data {
TEST(FileIterator, Basic) {
auto check_n_features = [](FileIterator *iter) {
size_t n_features = 0;
iter->Reset();
while (iter->Next()) {
auto proxy = MakeProxy(iter->Proxy());
auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
n_features = std::max(n_features, csr->NumColumns());
}
ASSERT_EQ(n_features, 5);
};
dmlc::TemporaryDirectory tmpdir;
{
auto zpath = tmpdir.path + "/0-based.svm";
CreateBigTestData(zpath, 3 * 64, true);
zpath += "?indexing_mode=0&format=libsvm";
FileIterator iter{zpath, 0, 1};
check_n_features(&iter);
}
{
auto opath = tmpdir.path + "/1-based.svm";
CreateBigTestData(opath, 3 * 64, false);
opath += "?indexing_mode=1&format=libsvm";
FileIterator iter{opath, 0, 1};
check_n_features(&iter);
}
}
} // namespace xgboost::data