Refactor fast-hist, add tests for some updaters. (#3836)

Add unittest for prune.

Add unittest for refresh.

Refactor fast_hist.

* Remove fast_hist_param.
* Rename to quantile_hist.

Add unittests for QuantileHist.

* Refactor QuantileHist into .h and .cc file.
* Remove sync.h.
* Remove MGPU_mock test.

Rename fast hist method to quantile hist.
This commit is contained in:
Jiaming Yuan
2018-11-07 21:15:07 +13:00
committed by GitHub
parent 2b045aa805
commit 19ee0a3579
30 changed files with 1366 additions and 983 deletions

View File

@@ -328,8 +328,8 @@ TEST(GpuHist, ApplySplit) {
shard->row_stride = n_cols;
thrust::sequence(shard->ridx.CurrentDVec().tbegin(),
shard->ridx.CurrentDVec().tend());
// Free inside DeviceShard
dh::safe_cuda(cudaMallocHost(&(shard->tmp_pinned), sizeof(int64_t)));
// Initialize GPUHistMaker
hist_maker.param_ = param;
RegTree tree;
@@ -390,15 +390,5 @@ TEST(GpuHist, ApplySplit) {
ASSERT_EQ(shard->ridx_segments[right_nidx].end, 16);
}
TEST(GpuHist, MGPU_mock) {
// Attempt to choose multiple GPU devices
int ngpu;
dh::safe_cuda(cudaGetDeviceCount(&ngpu));
CHECK_GT(ngpu, 1);
for (int i = 0; i < ngpu; ++i) {
dh::safe_cuda(cudaSetDevice(i));
}
}
} // namespace tree
} // namespace xgboost

View File

@@ -1,13 +1,13 @@
// Copyright by Contributors
#include "../../../src/tree/param.h"
#include "../helpers.h"
#include <gtest/gtest.h>
TEST(Param, VectorIOStream) {
std::vector<int> vals = {3, 2, 1};
std::stringstream ss;
std::vector<int> vals_in;
ss << vals;
EXPECT_EQ(ss.str(), "(3,2,1)");

View File

@@ -0,0 +1,72 @@
/*!
* Copyright 2018 by Contributors
*/
#include "../helpers.h"
#include "../../../src/common/host_device_vector.h"
#include <xgboost/tree_updater.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
namespace xgboost {
namespace tree {
TEST(Updater, Prune) {
int constexpr n_rows = 32, n_cols = 16;
std::vector<std::pair<std::string, std::string>> cfg;
cfg.push_back(std::pair<std::string, std::string>(
"num_feature", std::to_string(n_cols)));
cfg.push_back(std::pair<std::string, std::string>(
"min_split_loss", "10"));
cfg.push_back(std::pair<std::string, std::string>(
"silent", "1"));
// These data are just place holders.
HostDeviceVector<GradientPair> gpair =
{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},
{0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} };
auto dmat = CreateDMatrix(32, 16, 0.4, 3);
// prepare tree
RegTree tree = RegTree();
tree.InitModel();
tree.param.InitAllowUnknown(cfg);
std::vector<RegTree*> trees {&tree};
// prepare pruner
std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune"));
pruner->Init(cfg);
// loss_chg < min_split_loss;
tree.AddChilds(0);
int cleft = tree[0].LeftChild();
int cright = tree[0].RightChild();
tree[cleft].SetLeaf(0.3f, 0);
tree[cright].SetLeaf(0.4f, 0);
pruner->Update(&gpair, dmat->get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 0);
// loss_chg > min_split_loss;
tree.AddChilds(0);
cleft = tree[0].LeftChild();
cright = tree[0].RightChild();
tree[cleft].SetLeaf(0.3f, 0);
tree[cright].SetLeaf(0.4f, 0);
tree.Stat(0).loss_chg = 11;
pruner->Update(&gpair, dmat->get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
// loss_chg == min_split_loss;
tree.Stat(0).loss_chg = 10;
pruner->Update(&gpair, dmat->get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
delete dmat;
}
} // namespace tree
} // namespace xgboost

View File

@@ -0,0 +1,181 @@
/*!
* Copyright 2018 by Contributors
*/
#include "../helpers.h"
#include "../../../src/tree/param.h"
#include "../../../src/tree/updater_quantile_hist.h"
#include "../../../src/common/host_device_vector.h"
#include <xgboost/tree_updater.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
namespace xgboost {
namespace tree {
class QuantileHistMock : public QuantileHistMaker {
static double constexpr kEps = 1e-6;
struct BuilderMock : public QuantileHistMaker::Builder {
using RealImpl = QuantileHistMaker::Builder;
BuilderMock(const TrainParam& param,
std::unique_ptr<TreeUpdater> pruner,
std::unique_ptr<SplitEvaluator> spliteval)
: RealImpl(param, std::move(pruner), std::move(spliteval)) {}
public:
void TestInitData(const GHistIndexMatrix& gmat,
const std::vector<GradientPair>& gpair,
const DMatrix& fmat,
const RegTree& tree) {
RealImpl::InitData(gmat, gpair, fmat, tree);
ASSERT_EQ(data_layout_, kSparseData);
}
void TestBuildHist(int nid,
const GHistIndexMatrix& gmat,
const DMatrix& fmat,
const RegTree& tree) {
std::vector<GradientPair> gpair =
{ {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f},
{0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} };
RealImpl::InitData(gmat, gpair, fmat, tree);
GHistIndexBlockMatrix quantile_index_block;
hist_.AddHistRow(nid);
BuildHist(gpair, row_set_collection_[nid],
gmat, quantile_index_block, hist_[nid]);
std::vector<GradientPairPrecise> solution {
{0.27, 0.29}, {0.27, 0.29}, {0.47, 0.49},
{0.27, 0.29}, {0.57, 0.59}, {0.26, 0.27},
{0.37, 0.39}, {0.23, 0.24}, {0.37, 0.39},
{0.27, 0.28}, {0.27, 0.29}, {0.37, 0.39},
{0.26, 0.27}, {0.23, 0.24}, {0.57, 0.59},
{0.47, 0.49}, {0.47, 0.49}, {0.37, 0.39},
{0.26, 0.27}, {0.23, 0.24}, {0.27, 0.28},
{0.57, 0.59}, {0.23, 0.24}, {0.47, 0.49}};
for (size_t i = 0; i < hist_[nid].size; ++i) {
GradientPairPrecise sol = solution[i];
ASSERT_NEAR(sol.GetGrad(), hist_[nid].begin[i].sum_grad, kEps);
ASSERT_NEAR(sol.GetHess(), hist_[nid].begin[i].sum_hess, kEps);
}
}
void TestEvaluateSplit(const GHistIndexBlockMatrix& quantile_index_block,
const RegTree& tree) {
std::vector<GradientPair> row_gpairs =
{ {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f},
{0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} };
size_t constexpr max_bins = 4;
auto dmat = CreateDMatrix(n_rows, n_cols, 0, 3); // dense
common::GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), max_bins);
RealImpl::InitData(gmat, row_gpairs, *(*dmat), tree);
hist_.AddHistRow(0);
BuildHist(row_gpairs, row_set_collection_[0],
gmat, quantile_index_block, hist_[0]);
RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree);
// Manipulate the root_gain so that I don't have to invent an actual
// split. Yes, I'm cheating.
snode_[0].root_gain = 0.8;
RealImpl::EvaluateSplit(0, gmat, hist_, *(*dmat), tree);
ASSERT_NEAR(snode_.at(0).best.loss_chg, 0.7128048, kEps);
ASSERT_EQ(snode_.at(0).best.SplitIndex(), 10);
ASSERT_NEAR(snode_.at(0).best.split_value, 0.182258, kEps);
delete dmat;
}
};
int static constexpr n_rows = 8, n_cols = 16;
std::shared_ptr<xgboost::DMatrix> *dmat;
const std::vector<std::pair<std::string, std::string> > cfg;
std::shared_ptr<BuilderMock> builder_;
public:
explicit QuantileHistMock(
const std::vector<std::pair<std::string, std::string> >& args) :
cfg{args} {
QuantileHistMaker::Init(args);
builder_.reset(
new BuilderMock(
param_,
std::move(pruner_),
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone())));
dmat = CreateDMatrix(n_rows, n_cols, 0.8, 3);
}
~QuantileHistMock() { delete dmat; }
static size_t GetNumColumns() { return n_cols; }
void TestInitData() {
size_t constexpr max_bins = 4;
common::GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), max_bins);
RegTree tree = RegTree();
tree.InitModel();
tree.param.InitAllowUnknown(cfg);
std::vector<GradientPair> gpair =
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
builder_->TestInitData(gmat, gpair, *(*dmat), tree);
}
void TestBuildHist() {
RegTree tree = RegTree();
tree.InitModel();
tree.param.InitAllowUnknown(cfg);
size_t constexpr max_bins = 4;
common::GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), max_bins);
builder_->TestBuildHist(0, gmat, *(*dmat).get(), tree);
}
void TestEvaluateSplit() {
RegTree tree = RegTree();
tree.InitModel();
tree.param.InitAllowUnknown(cfg);
builder_->TestEvaluateSplit(gmatb_, tree);
}
};
TEST(Updater, QuantileHist_InitData) {
std::vector<std::pair<std::string, std::string>> cfg
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}};
QuantileHistMock maker(cfg);
maker.TestInitData();
}
TEST(Updater, QuantileHist_BuildHist) {
// Don't enable feature grouping
std::vector<std::pair<std::string, std::string>> cfg
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},
{"enable_feature_grouping", std::to_string(0)}};
QuantileHistMock maker(cfg);
maker.TestBuildHist();
}
TEST(Updater, QuantileHist_EvalSplits) {
std::vector<std::pair<std::string, std::string>> cfg
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},
{"split_evaluator", "elastic_net"}};
QuantileHistMock maker(cfg);
maker.TestEvaluateSplit();
}
} // namespace tree
} // namespace xgboost

View File

@@ -0,0 +1,57 @@
/*!
* Copyright 2018 by Contributors
*/
#include "../helpers.h"
#include "../../../src/common/host_device_vector.h"
#include <xgboost/tree_updater.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
namespace xgboost {
namespace tree {
TEST(Updater, Refresh) {
int constexpr n_rows = 8, n_cols = 16;
HostDeviceVector<GradientPair> gpair =
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
auto dmat = CreateDMatrix(n_rows, n_cols, 0.4, 3);
std::vector<std::pair<std::string, std::string>> cfg {
{"reg_alpha", "0.0"},
{"num_feature", std::to_string(n_cols)},
{"reg_lambda", "1"}};
RegTree tree = RegTree();
tree.InitModel();
tree.param.InitAllowUnknown(cfg);
std::vector<RegTree*> trees {&tree};
std::unique_ptr<TreeUpdater> refresher(TreeUpdater::Create("refresh"));
tree.AddChilds(0);
int cleft = tree[0].LeftChild();
int cright = tree[0].RightChild();
tree[cleft].SetLeaf(0.2f, 0);
tree[cright].SetLeaf(0.8f, 0);
tree[0].SetSplit(2, 0.2f);
tree.Stat(cleft).base_weight = 1.2;
tree.Stat(cright).base_weight = 1.3;
refresher->Init(cfg);
refresher->Update(&gpair, dmat->get(), trees);
bst_float constexpr kEps = 1e-6;
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
ASSERT_NEAR(-0.224489, tree.Stat(0).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(cleft).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
delete dmat;
}
} // namespace tree
} // namespace xgboost