Update dmlc-core and use data iter for GPU sampling tests. (#7398)

* Update dmlc-core.
* New parquet parser in dmlc-core.
* Use data iter for GPU sampling tests.
This commit is contained in:
Jiaming Yuan
2021-11-06 05:12:49 +08:00
committed by GitHub
parent c968217ca8
commit 6ede12412c
6 changed files with 61 additions and 18 deletions

View File

@@ -22,8 +22,8 @@ void VerifySampling(size_t page_size,
size_t sample_rows = kRows * subsample;
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix> dmat(
CreateSparsePageDMatrixWithRC(kRows, kCols, page_size, true, tmpdir));
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(
kRows, kCols, kRows / (page_size == 0 ? kRows : page_size), tmpdir.path + "/cache"));
auto gpair = GenerateRandomGradients(kRows);
GradientPair sum_gpair{};
for (const auto& gp : gpair.ConstHostVector()) {
@@ -81,8 +81,8 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
std::unique_ptr<DMatrix> dmat(
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
auto gpair = GenerateRandomGradients(kRows);
gpair.SetDevice(0);

View File

@@ -468,13 +468,14 @@ TEST(GpuHist, ExternalMemory) {
constexpr size_t kCols = 2;
constexpr size_t kPageSize = 1024;
// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
dmlc::TemporaryDirectory tmpdir;
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
std::unique_ptr<DMatrix> dmat_ext(
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
// Create a single batch DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(kRows, kCols, 1, tmpdir.path + "/cache"));
auto gpair = GenerateRandomGradients(kRows);
@@ -503,13 +504,14 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
const std::string kSamplingMethod = "gradient_based";
common::GlobalRandom().seed(0);
// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
dmlc::TemporaryDirectory tmpdir;
// Create a single batch DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(kRows, kCols, 1, tmpdir.path + "/cache"));
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
std::unique_ptr<DMatrix> dmat_ext(
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
auto gpair = GenerateRandomGradients(kRows);