Refactor tests with data generator. (#5439)

This commit is contained in:
Jiaming Yuan
2020-03-27 06:44:44 +08:00
committed by GitHub
parent 7146b91d5a
commit 4942da64ae
26 changed files with 334 additions and 259 deletions

View File

@@ -11,11 +11,10 @@ void TestDeterminsticHistogram() {
size_t constexpr kBins = 24, kCols = 8, kRows = 32768, kRounds = 16;
float constexpr kLower = -1e-2, kUpper = 1e2;
auto pp_m = CreateDMatrix(kRows, kCols, 0.5);
auto& matrix = **pp_m;
auto matrix = RandomDataGenerator(kRows, kCols, 0.5).GenerateDMatix();
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0};
for (auto const& batch : matrix.GetBatches<EllpackPage>(batch_param)) {
for (auto const& batch : matrix->GetBatches<EllpackPage>(batch_param)) {
auto* page = batch.Impl();
tree::RowPartitioner row_partitioner(0, kRows);
@@ -58,7 +57,6 @@ void TestDeterminsticHistogram() {
}
}
}
delete pp_m;
}
TEST(Histogram, GPUDeterminstic) {

View File

@@ -313,22 +313,21 @@ TEST(GpuHist, MinSplitLoss) {
constexpr size_t kRows = 32;
constexpr size_t kCols = 16;
constexpr float kSparsity = 0.6;
auto dmat = CreateDMatrix(kRows, kCols, kSparsity, 3);
auto dmat = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatix();
auto gpair = GenerateRandomGradients(kRows);
{
int32_t n_nodes = TestMinSplitLoss((*dmat).get(), 0.01, &gpair);
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 0.01, &gpair);
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
// when writing this test, and only used for testing larger gamma (below) does prevent
// building tree.
ASSERT_EQ(n_nodes, 2);
}
{
int32_t n_nodes = TestMinSplitLoss((*dmat).get(), 100.0, &gpair);
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 100.0, &gpair);
// No new nodes with gamma == 100.
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
}
delete dmat;
}
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,

View File

@@ -15,8 +15,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
GenericParameter param;
param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto pp_dmat = CreateDMatrix(kRows, kCols, 0.6, 3);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatix();
HostDeviceVector<GradientPair> gradients (kRows);
std::vector<GradientPair>& h_gradients = gradients.HostVector();
@@ -62,7 +61,6 @@ TEST(GrowHistMaker, InteractionConstraint) {
ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
}
delete pp_dmat;
}
} // namespace tree

View File

@@ -28,7 +28,8 @@ TEST(Updater, Prune) {
HostDeviceVector<GradientPair> gpair =
{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},
{0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} };
auto dmat = CreateDMatrix(32, kCols, 0.4, 3);
std::shared_ptr<DMatrix> p_dmat {
RandomDataGenerator{32, 10, 0}.GenerateDMatix() };
auto lparam = CreateEmptyGenericParam(GPUIDX);
@@ -42,19 +43,19 @@ TEST(Updater, Prune) {
// loss_chg < min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 0);
// loss_chg > min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
// loss_chg == min_split_loss;
tree.Stat(0).loss_chg = 10;
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@@ -68,7 +69,7 @@ TEST(Updater, Prune) {
/*loss_chg=*/19.0f, 0.0f);
cfg.emplace_back(std::make_pair("max_depth", "1"));
pruner->Configure(cfg);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@@ -77,10 +78,8 @@ TEST(Updater, Prune) {
/*loss_chg=*/18.0f, 0.0f);
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
pruner->Configure(cfg);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
delete dmat;
}
} // namespace tree
} // namespace xgboost

View File

@@ -139,23 +139,23 @@ class QuantileHistMock : public QuantileHistMaker {
{ {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
{0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} };
size_t constexpr kMaxBins = 4;
auto dmat = CreateDMatrix(kNRows, kNCols, 0, 3);
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatix();
// dense, no missing values
common::GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), kMaxBins);
gmat.Init(dmat.get(), kMaxBins);
RealImpl::InitData(gmat, row_gpairs, *(*dmat), tree);
RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
hist_.AddHistRow(0);
BuildHist(row_gpairs, row_set_collection_[0],
gmat, quantile_index_block, hist_[0]);
RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree);
RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);
/* Compute correct split (best_split) using the computed histogram */
const size_t num_row = dmat->get()->Info().num_row_;
const size_t num_feature = dmat->get()->Info().num_col_;
const size_t num_row = dmat->Info().num_row_;
const size_t num_feature = dmat->Info().num_col_;
CHECK_EQ(num_row, row_gpairs.size());
// Compute total gradient for all data points
GradientPairPrecise total_gpair;
@@ -216,8 +216,6 @@ class QuantileHistMock : public QuantileHistMaker {
RealImpl::EvaluateSplits({node}, gmat, hist_, tree);
ASSERT_EQ(snode_[0].best.SplitIndex(), best_split_feature);
ASSERT_EQ(snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);
delete dmat;
}
void TestEvaluateSplitParallel(const GHistIndexBlockMatrix &quantile_index_block,
@@ -230,7 +228,7 @@ class QuantileHistMock : public QuantileHistMaker {
};
int static constexpr kNRows = 8, kNCols = 16;
std::shared_ptr<xgboost::DMatrix> *dmat_;
std::shared_ptr<xgboost::DMatrix> dmat_;
const std::vector<std::pair<std::string, std::string> > cfg_;
std::shared_ptr<BuilderMock> builder_;
@@ -240,23 +238,23 @@ class QuantileHistMock : public QuantileHistMaker {
cfg_{args} {
QuantileHistMaker::Configure(args);
spliteval_->Init(&param_);
dmat_ = CreateDMatrix(kNRows, kNCols, 0.8, 3);
dmat_ = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatix();
builder_.reset(
new BuilderMock(
param_,
std::move(pruner_),
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone()),
int_constraint_,
dmat_->get()));
dmat_.get()));
}
~QuantileHistMock() override { delete dmat_; }
~QuantileHistMock() override = default;
static size_t GetNumColumns() { return kNCols; }
void TestInitData() {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init((*dmat_).get(), kMaxBins);
gmat.Init(dmat_.get(), kMaxBins);
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@@ -265,7 +263,7 @@ class QuantileHistMock : public QuantileHistMaker {
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
builder_->TestInitData(gmat, gpair, dmat_->get(), tree);
builder_->TestInitData(gmat, gpair, dmat_.get(), tree);
}
void TestBuildHist() {
@@ -274,9 +272,9 @@ class QuantileHistMock : public QuantileHistMaker {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init((*dmat_).get(), kMaxBins);
gmat.Init(dmat_.get(), kMaxBins);
builder_->TestBuildHist(0, gmat, *(*dmat_).get(), tree);
builder_->TestBuildHist(0, gmat, *dmat_, tree);
}
void TestEvaluateSplit() {

View File

@@ -15,16 +15,18 @@ namespace xgboost {
namespace tree {
TEST(Updater, Refresh) {
int constexpr kNRows = 8, kNCols = 16;
bst_row_t constexpr kRows = 8;
bst_feature_t constexpr kCols = 16;
HostDeviceVector<GradientPair> gpair =
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
auto dmat = CreateDMatrix(kNRows, kNCols, 0.4, 3);
std::vector<std::pair<std::string, std::string>> cfg {
{"reg_alpha", "0.0"},
{"num_feature", std::to_string(kNCols)},
{"reg_lambda", "1"}};
std::shared_ptr<DMatrix> p_dmat{
RandomDataGenerator{kRows, kCols, 0.4f}.Seed(3).GenerateDMatix()};
std::vector<std::pair<std::string, std::string>> cfg{
{"reg_alpha", "0.0"},
{"num_feature", std::to_string(kCols)},
{"reg_lambda", "1"}};
RegTree tree = RegTree();
auto lparam = CreateEmptyGenericParam(GPUIDX);
@@ -40,7 +42,7 @@ TEST(Updater, Refresh) {
tree.Stat(cright).base_weight = 1.3;
refresher->Configure(cfg);
refresher->Update(&gpair, dmat->get(), trees);
refresher->Update(&gpair, p_dmat.get(), trees);
bst_float constexpr kEps = 1e-6;
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
@@ -48,8 +50,6 @@ TEST(Updater, Refresh) {
ASSERT_NEAR(0, tree.Stat(cleft).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
delete dmat;
}
} // namespace tree