refactor tests to get rid of duplication (#4358)

* refactor tests to get rid of duplication

* address review comments
This commit is contained in:
Rong Ou 2019-04-12 00:21:48 -07:00 committed by Philip Hyunsu Cho
parent 3078b5944d
commit f4521bf6aa
5 changed files with 50 additions and 67 deletions

View File

@ -26,23 +26,7 @@ TEST(SparsePageDMatrix, MetaInfo) {
}
TEST(SparsePageDMatrix, RowAccess) {
// Create sufficiently large data to make two row pages
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/big.libsvm";
CreateBigTestData(tmp_file, 12);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", 64UL);
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto &batch : dmat->GetRowBatches()) {
batch_count++;
row_count += batch.Size();
}
EXPECT_EQ(batch_count, 2);
EXPECT_EQ(row_count, dmat->Info().num_row_);
std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix();
// Test the data read into the first row
auto &batch = *dmat->GetRowBatches().begin();
@ -50,8 +34,6 @@ TEST(SparsePageDMatrix, RowAccess) {
ASSERT_EQ(first_row.size(), 3);
EXPECT_EQ(first_row[2].index, 2);
EXPECT_EQ(first_row[2].fvalue, 20);
delete dmat;
}
TEST(SparsePageDMatrix, ColAccess) {

View File

@ -5,6 +5,7 @@
#include "xgboost/c_api.h"
#include <random>
#include <cinttypes>
#include <dmlc/filesystem.h>
bool FileExists(const std::string& filename) {
struct stat st;
@ -142,4 +143,38 @@ std::shared_ptr<xgboost::DMatrix>* CreateDMatrix(int rows, int columns,
return static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
}
std::unique_ptr<DMatrix> CreateSparsePageDMatrix() {
// Create sufficiently large data to make two row pages
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/big.libsvm";
CreateBigTestData(tmp_file, 12);
std::unique_ptr<DMatrix> dmat = std::unique_ptr<DMatrix>(DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", 64UL));
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto &batch : dmat->GetRowBatches()) {
batch_count++;
row_count += batch.Size();
}
EXPECT_EQ(batch_count, 2);
EXPECT_EQ(row_count, dmat->Info().num_row_);
return dmat;
}
gbm::GBTreeModel CreateTestModel() {
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
(*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
model.base_margin = 0;
return model;
}
} // namespace xgboost

View File

@ -17,6 +17,7 @@
#include <xgboost/base.h>
#include <xgboost/objective.h>
#include <xgboost/metric.h>
#include <xgboost/predictor.h>
#if defined(__CUDACC__)
#define DeclareUnifiedTest(name) GPU ## name
@ -153,5 +154,9 @@ class SimpleRealUniformDistribution {
std::shared_ptr<xgboost::DMatrix> *CreateDMatrix(int rows, int columns,
float sparsity, int seed = 0);
std::unique_ptr<DMatrix> CreateSparsePageDMatrix();
gbm::GBTreeModel CreateTestModel();
} // namespace xgboost
#endif

View File

@ -9,14 +9,7 @@ TEST(cpu_predictor, Test) {
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor"));
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
(*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
model.base_margin = 0;
gbm::GBTreeModel model = CreateTestModel();
int n_row = 5;
int n_col = 5;
@ -62,34 +55,16 @@ TEST(cpu_predictor, Test) {
}
TEST(cpu_predictor, ExternalMemoryTest) {
// Create sufficiently large data to make two row pages
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/big.libsvm";
CreateBigTestData(tmp_file, 12);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", 64UL);
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
int64_t batche_count = 0;
for (const auto &batch : dmat->GetRowBatches()) {
batche_count++;
}
EXPECT_EQ(batche_count, 2);
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix();
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor"));
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
(*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
model.base_margin = 0;
gbm::GBTreeModel model = CreateTestModel();
// Test predict batch
HostDeviceVector<float> out_predictions;
cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float> &out_predictions_h = out_predictions.HostVector();
EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_);
for (const auto& v : out_predictions_h) {
@ -98,7 +73,7 @@ TEST(cpu_predictor, ExternalMemoryTest) {
// Test predict leaf
std::vector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
EXPECT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
@ -106,7 +81,7 @@ TEST(cpu_predictor, ExternalMemoryTest) {
// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution(dmat, &out_contribution, model);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_);
for (const auto& v : out_contribution) {
ASSERT_EQ(v, 1.5);
@ -114,12 +89,10 @@ TEST(cpu_predictor, ExternalMemoryTest) {
// Test predict contribution (approximate method)
std::vector<float> out_contribution_approximate;
cpu_predictor->PredictContribution(dmat, &out_contribution_approximate, model, true);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_);
for (const auto& v : out_contribution_approximate) {
ASSERT_EQ(v, 1.5);
}
delete dmat;
}
} // namespace xgboost

View File

@ -33,13 +33,7 @@ TEST(gpu_predictor, Test) {
gpu_predictor->Init({}, {});
cpu_predictor->Init({}, {});
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree()));
(*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel();
int n_row = 5;
int n_col = 5;
@ -181,13 +175,7 @@ TEST(gpu_predictor, MGPU_Test) {
int n_row = i, n_col = i;
auto dmat = CreateDMatrix(n_row, n_col, 0);
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree()));
(*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel();
// Test predict batch
HostDeviceVector<float> gpu_out_predictions;