Remove SimpleCSRSource (#5315)

This commit is contained in:
Rory Mitchell
2020-02-18 16:49:17 +13:00
committed by GitHub
parent 9f77c18b0d
commit b2b2c4e231
18 changed files with 121 additions and 286 deletions

View File

@@ -240,7 +240,7 @@ TEST(hist_util, DenseCutsCategorical) {
auto dmat = GetDMatrixFromData(x, n, 1);
HistogramCuts cuts;
DenseCuts dense(&cuts);
dense.Build(&dmat, num_bins);
dense.Build(dmat.get(), num_bins);
auto cuts_from_sketch = cuts.Values();
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
@@ -260,7 +260,7 @@ TEST(hist_util, DenseCutsAccuracyTest) {
for (auto num_bins : bin_sizes) {
HistogramCuts cuts;
DenseCuts dense(&cuts);
dense.Build(&dmat, num_bins);
dense.Build(dmat.get(), num_bins);
ValidateCuts(cuts, x, num_rows, num_columns, num_bins);
}
}
@@ -294,7 +294,7 @@ TEST(hist_util, SparseCutsAccuracyTest) {
for (auto num_bins : bin_sizes) {
HistogramCuts cuts;
SparseCuts sparse(&cuts);
sparse.Build(&dmat, num_bins);
sparse.Build(dmat.get(), num_bins);
ValidateCuts(cuts, x, num_rows, num_columns, num_bins);
}
}
@@ -312,7 +312,7 @@ TEST(hist_util, SparseCutsCategorical) {
auto dmat = GetDMatrixFromData(x, n, 1);
HistogramCuts cuts;
SparseCuts sparse(&cuts);
sparse.Build(&dmat, num_bins);
sparse.Build(dmat.get(), num_bins);
auto cuts_from_sketch = cuts.Values();
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());

View File

@@ -7,6 +7,7 @@
#include <fstream>
#include "../../../src/common/hist_util.h"
#include "../../../src/data/simple_dmatrix.h"
#include "../../../src/data/adapter.h"
// Some helper functions used to test both GPU and CPU algorithms
//
@@ -40,10 +41,11 @@ inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n,
return x;
}
inline data::SimpleDMatrix GetDMatrixFromData(const std::vector<float>& x, int num_rows, int num_columns) {
inline std::shared_ptr<data::SimpleDMatrix> GetDMatrixFromData(const std::vector<float>& x, int num_rows, int num_columns) {
data::DenseAdapter adapter(x.data(), num_rows, num_columns);
return data::SimpleDMatrix(&adapter, std::numeric_limits<float>::quiet_NaN(),
1);
return std::shared_ptr<data::SimpleDMatrix>(new data::SimpleDMatrix(
&adapter, std::numeric_limits<float>::quiet_NaN(),
1));
}
inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(

View File

@@ -7,7 +7,6 @@
#include <memory>
#include "../../../src/common/bitfield.h"
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/data/simple_csr_source.h"
namespace xgboost {

View File

@@ -4,7 +4,6 @@
#include <xgboost/data.h>
#include <string>
#include <memory>
#include "../../../src/data/simple_csr_source.h"
#include "../../../src/common/version.h"
#include "../helpers.h"

View File

@@ -1,41 +0,0 @@
// Copyright by Contributors
#include <gtest/gtest.h>
#include <dmlc/filesystem.h>
#include <xgboost/data.h>
#include <xgboost/json.h>
#include "../../../src/data/simple_csr_source.h"
#include "../helpers.h"
namespace xgboost {
TEST(SimpleCSRSource, SaveLoadBinary) {
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
dmat->SaveToLocalFile(tmp_binfile);
xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
// Test we have non-empty batch
EXPECT_EQ(dmat->GetBatches<xgboost::SparsePage>().begin().AtEnd(), false);
auto row_iter = dmat->GetBatches<xgboost::SparsePage>().begin();
auto row_iter_read = dmat_read->GetBatches<xgboost::SparsePage>().begin();
// Test the data read into the first row
auto first_row = (*row_iter)[0];
auto first_row_read = (*row_iter_read)[0];
EXPECT_EQ(first_row.size(), first_row_read.size());
EXPECT_EQ(first_row[2].index, first_row_read[2].index);
EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);
delete dmat;
delete dmat_read;
}
} // namespace xgboost

View File

@@ -254,3 +254,33 @@ TEST(SimpleDMatrix, Slice) {
delete pp_dmat;
};
TEST(SimpleDMatrix, SaveLoadBinary) {
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
simple_dmat->SaveToLocalFile(tmp_binfile);
xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
// Test we have non-empty batch
EXPECT_EQ(dmat->GetBatches<xgboost::SparsePage>().begin().AtEnd(), false);
auto row_iter = dmat->GetBatches<xgboost::SparsePage>().begin();
auto row_iter_read = dmat_read->GetBatches<xgboost::SparsePage>().begin();
// Test the data read into the first row
auto first_row = (*row_iter)[0];
auto first_row_read = (*row_iter_read)[0];
EXPECT_EQ(first_row.size(), first_row_read.size());
EXPECT_EQ(first_row[2].index, first_row_read[2].index);
EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);
delete dmat;
delete dmat_read;
}

View File

@@ -17,7 +17,6 @@
#include "helpers.h"
#include "xgboost/c_api.h"
#include "../../src/data/simple_csr_source.h"
#include "../../src/gbm/gbtree_model.h"
#include "xgboost/predictor.h"
@@ -256,17 +255,13 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
}
fo.close();
std::unique_ptr<DMatrix> dmat(DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", page_size));
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
if (!page_size) {
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource);
source->CopyFrom(dmat.get());
return std::unique_ptr<DMatrix>(DMatrix::Create(std::move(source)));
} else {
return dmat;
std::string uri = tmp_file;
if (page_size > 0) {
uri += "#" + tmp_file + ".cache";
}
std::unique_ptr<DMatrix> dmat(
DMatrix::Load(uri, true, false, "auto", page_size));
return dmat;
}
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, size_t n_classes) {