Refactor fast-hist, add tests for some updaters. (#3836)
Add unittest for prune. Add unittest for refresh. Refactor fast_hist. * Remove fast_hist_param. * Rename to quantile_hist. Add unittests for QuantileHist. * Refactor QuantileHist into .h and .cc file. * Remove sync.h. * Remove MGPU_mock test. Rename fast hist method to quantile hist.
This commit is contained in:
@@ -328,8 +328,8 @@ TEST(GpuHist, ApplySplit) {
|
||||
shard->row_stride = n_cols;
|
||||
thrust::sequence(shard->ridx.CurrentDVec().tbegin(),
|
||||
shard->ridx.CurrentDVec().tend());
|
||||
// Free inside DeviceShard
|
||||
dh::safe_cuda(cudaMallocHost(&(shard->tmp_pinned), sizeof(int64_t)));
|
||||
|
||||
// Initialize GPUHistMaker
|
||||
hist_maker.param_ = param;
|
||||
RegTree tree;
|
||||
@@ -390,15 +390,5 @@ TEST(GpuHist, ApplySplit) {
|
||||
ASSERT_EQ(shard->ridx_segments[right_nidx].end, 16);
|
||||
}
|
||||
|
||||
TEST(GpuHist, MGPU_mock) {
|
||||
// Attempt to choose multiple GPU devices
|
||||
int ngpu;
|
||||
dh::safe_cuda(cudaGetDeviceCount(&ngpu));
|
||||
CHECK_GT(ngpu, 1);
|
||||
for (int i = 0; i < ngpu; ++i) {
|
||||
dh::safe_cuda(cudaSetDevice(i));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
// Copyright by Contributors
|
||||
#include "../../../src/tree/param.h"
|
||||
|
||||
#include "../helpers.h"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(Param, VectorIOStream) {
|
||||
std::vector<int> vals = {3, 2, 1};
|
||||
std::stringstream ss;
|
||||
std::vector<int> vals_in;
|
||||
|
||||
|
||||
ss << vals;
|
||||
EXPECT_EQ(ss.str(), "(3,2,1)");
|
||||
|
||||
|
||||
72
tests/cpp/tree/test_prune.cc
Normal file
72
tests/cpp/tree/test_prune.cc
Normal file
@@ -0,0 +1,72 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
*/
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/host_device_vector.h"
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
TEST(Updater, Prune) {
|
||||
int constexpr n_rows = 32, n_cols = 16;
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> cfg;
|
||||
cfg.push_back(std::pair<std::string, std::string>(
|
||||
"num_feature", std::to_string(n_cols)));
|
||||
cfg.push_back(std::pair<std::string, std::string>(
|
||||
"min_split_loss", "10"));
|
||||
cfg.push_back(std::pair<std::string, std::string>(
|
||||
"silent", "1"));
|
||||
|
||||
// These data are just place holders.
|
||||
HostDeviceVector<GradientPair> gpair =
|
||||
{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},
|
||||
{0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} };
|
||||
auto dmat = CreateDMatrix(32, 16, 0.4, 3);
|
||||
|
||||
// prepare tree
|
||||
RegTree tree = RegTree();
|
||||
tree.InitModel();
|
||||
tree.param.InitAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
// prepare pruner
|
||||
std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune"));
|
||||
pruner->Init(cfg);
|
||||
|
||||
// loss_chg < min_split_loss;
|
||||
tree.AddChilds(0);
|
||||
int cleft = tree[0].LeftChild();
|
||||
int cright = tree[0].RightChild();
|
||||
tree[cleft].SetLeaf(0.3f, 0);
|
||||
tree[cright].SetLeaf(0.4f, 0);
|
||||
pruner->Update(&gpair, dmat->get(), trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 0);
|
||||
|
||||
// loss_chg > min_split_loss;
|
||||
tree.AddChilds(0);
|
||||
cleft = tree[0].LeftChild();
|
||||
cright = tree[0].RightChild();
|
||||
tree[cleft].SetLeaf(0.3f, 0);
|
||||
tree[cright].SetLeaf(0.4f, 0);
|
||||
tree.Stat(0).loss_chg = 11;
|
||||
pruner->Update(&gpair, dmat->get(), trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
// loss_chg == min_split_loss;
|
||||
tree.Stat(0).loss_chg = 10;
|
||||
pruner->Update(&gpair, dmat->get(), trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
181
tests/cpp/tree/test_quantile_hist.cc
Normal file
181
tests/cpp/tree/test_quantile_hist.cc
Normal file
@@ -0,0 +1,181 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
*/
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/tree/param.h"
|
||||
#include "../../../src/tree/updater_quantile_hist.h"
|
||||
#include "../../../src/common/host_device_vector.h"
|
||||
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
class QuantileHistMock : public QuantileHistMaker {
|
||||
static double constexpr kEps = 1e-6;
|
||||
|
||||
struct BuilderMock : public QuantileHistMaker::Builder {
|
||||
using RealImpl = QuantileHistMaker::Builder;
|
||||
|
||||
BuilderMock(const TrainParam& param,
|
||||
std::unique_ptr<TreeUpdater> pruner,
|
||||
std::unique_ptr<SplitEvaluator> spliteval)
|
||||
: RealImpl(param, std::move(pruner), std::move(spliteval)) {}
|
||||
|
||||
public:
|
||||
void TestInitData(const GHistIndexMatrix& gmat,
|
||||
const std::vector<GradientPair>& gpair,
|
||||
const DMatrix& fmat,
|
||||
const RegTree& tree) {
|
||||
RealImpl::InitData(gmat, gpair, fmat, tree);
|
||||
ASSERT_EQ(data_layout_, kSparseData);
|
||||
}
|
||||
|
||||
void TestBuildHist(int nid,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const DMatrix& fmat,
|
||||
const RegTree& tree) {
|
||||
std::vector<GradientPair> gpair =
|
||||
{ {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f},
|
||||
{0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} };
|
||||
RealImpl::InitData(gmat, gpair, fmat, tree);
|
||||
GHistIndexBlockMatrix quantile_index_block;
|
||||
hist_.AddHistRow(nid);
|
||||
BuildHist(gpair, row_set_collection_[nid],
|
||||
gmat, quantile_index_block, hist_[nid]);
|
||||
std::vector<GradientPairPrecise> solution {
|
||||
{0.27, 0.29}, {0.27, 0.29}, {0.47, 0.49},
|
||||
{0.27, 0.29}, {0.57, 0.59}, {0.26, 0.27},
|
||||
{0.37, 0.39}, {0.23, 0.24}, {0.37, 0.39},
|
||||
{0.27, 0.28}, {0.27, 0.29}, {0.37, 0.39},
|
||||
{0.26, 0.27}, {0.23, 0.24}, {0.57, 0.59},
|
||||
{0.47, 0.49}, {0.47, 0.49}, {0.37, 0.39},
|
||||
{0.26, 0.27}, {0.23, 0.24}, {0.27, 0.28},
|
||||
{0.57, 0.59}, {0.23, 0.24}, {0.47, 0.49}};
|
||||
|
||||
for (size_t i = 0; i < hist_[nid].size; ++i) {
|
||||
GradientPairPrecise sol = solution[i];
|
||||
ASSERT_NEAR(sol.GetGrad(), hist_[nid].begin[i].sum_grad, kEps);
|
||||
ASSERT_NEAR(sol.GetHess(), hist_[nid].begin[i].sum_hess, kEps);
|
||||
}
|
||||
}
|
||||
|
||||
void TestEvaluateSplit(const GHistIndexBlockMatrix& quantile_index_block,
|
||||
const RegTree& tree) {
|
||||
std::vector<GradientPair> row_gpairs =
|
||||
{ {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f},
|
||||
{0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} };
|
||||
size_t constexpr max_bins = 4;
|
||||
auto dmat = CreateDMatrix(n_rows, n_cols, 0, 3); // dense
|
||||
|
||||
common::GHistIndexMatrix gmat;
|
||||
gmat.Init((*dmat).get(), max_bins);
|
||||
|
||||
RealImpl::InitData(gmat, row_gpairs, *(*dmat), tree);
|
||||
hist_.AddHistRow(0);
|
||||
|
||||
BuildHist(row_gpairs, row_set_collection_[0],
|
||||
gmat, quantile_index_block, hist_[0]);
|
||||
|
||||
RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree);
|
||||
// Manipulate the root_gain so that I don't have to invent an actual
|
||||
// split. Yes, I'm cheating.
|
||||
snode_[0].root_gain = 0.8;
|
||||
RealImpl::EvaluateSplit(0, gmat, hist_, *(*dmat), tree);
|
||||
|
||||
ASSERT_NEAR(snode_.at(0).best.loss_chg, 0.7128048, kEps);
|
||||
ASSERT_EQ(snode_.at(0).best.SplitIndex(), 10);
|
||||
ASSERT_NEAR(snode_.at(0).best.split_value, 0.182258, kEps);
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
};
|
||||
|
||||
int static constexpr n_rows = 8, n_cols = 16;
|
||||
std::shared_ptr<xgboost::DMatrix> *dmat;
|
||||
const std::vector<std::pair<std::string, std::string> > cfg;
|
||||
std::shared_ptr<BuilderMock> builder_;
|
||||
|
||||
public:
|
||||
explicit QuantileHistMock(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) :
|
||||
cfg{args} {
|
||||
QuantileHistMaker::Init(args);
|
||||
builder_.reset(
|
||||
new BuilderMock(
|
||||
param_,
|
||||
std::move(pruner_),
|
||||
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone())));
|
||||
dmat = CreateDMatrix(n_rows, n_cols, 0.8, 3);
|
||||
}
|
||||
~QuantileHistMock() { delete dmat; }
|
||||
|
||||
static size_t GetNumColumns() { return n_cols; }
|
||||
|
||||
void TestInitData() {
|
||||
size_t constexpr max_bins = 4;
|
||||
common::GHistIndexMatrix gmat;
|
||||
gmat.Init((*dmat).get(), max_bins);
|
||||
|
||||
RegTree tree = RegTree();
|
||||
tree.InitModel();
|
||||
tree.param.InitAllowUnknown(cfg);
|
||||
|
||||
std::vector<GradientPair> gpair =
|
||||
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
|
||||
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
|
||||
|
||||
builder_->TestInitData(gmat, gpair, *(*dmat), tree);
|
||||
}
|
||||
|
||||
void TestBuildHist() {
|
||||
RegTree tree = RegTree();
|
||||
tree.InitModel();
|
||||
tree.param.InitAllowUnknown(cfg);
|
||||
|
||||
size_t constexpr max_bins = 4;
|
||||
common::GHistIndexMatrix gmat;
|
||||
gmat.Init((*dmat).get(), max_bins);
|
||||
|
||||
builder_->TestBuildHist(0, gmat, *(*dmat).get(), tree);
|
||||
}
|
||||
|
||||
void TestEvaluateSplit() {
|
||||
RegTree tree = RegTree();
|
||||
tree.InitModel();
|
||||
tree.param.InitAllowUnknown(cfg);
|
||||
|
||||
builder_->TestEvaluateSplit(gmatb_, tree);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Updater, QuantileHist_InitData) {
|
||||
std::vector<std::pair<std::string, std::string>> cfg
|
||||
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}};
|
||||
QuantileHistMock maker(cfg);
|
||||
maker.TestInitData();
|
||||
}
|
||||
|
||||
TEST(Updater, QuantileHist_BuildHist) {
|
||||
// Don't enable feature grouping
|
||||
std::vector<std::pair<std::string, std::string>> cfg
|
||||
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},
|
||||
{"enable_feature_grouping", std::to_string(0)}};
|
||||
QuantileHistMock maker(cfg);
|
||||
maker.TestBuildHist();
|
||||
}
|
||||
|
||||
TEST(Updater, QuantileHist_EvalSplits) {
|
||||
std::vector<std::pair<std::string, std::string>> cfg
|
||||
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},
|
||||
{"split_evaluator", "elastic_net"}};
|
||||
QuantileHistMock maker(cfg);
|
||||
maker.TestEvaluateSplit();
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
57
tests/cpp/tree/test_refresh.cc
Normal file
57
tests/cpp/tree/test_refresh.cc
Normal file
@@ -0,0 +1,57 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
*/
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/host_device_vector.h"
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
TEST(Updater, Refresh) {
|
||||
int constexpr n_rows = 8, n_cols = 16;
|
||||
|
||||
HostDeviceVector<GradientPair> gpair =
|
||||
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
|
||||
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
|
||||
auto dmat = CreateDMatrix(n_rows, n_cols, 0.4, 3);
|
||||
std::vector<std::pair<std::string, std::string>> cfg {
|
||||
{"reg_alpha", "0.0"},
|
||||
{"num_feature", std::to_string(n_cols)},
|
||||
{"reg_lambda", "1"}};
|
||||
|
||||
RegTree tree = RegTree();
|
||||
tree.InitModel();
|
||||
tree.param.InitAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
std::unique_ptr<TreeUpdater> refresher(TreeUpdater::Create("refresh"));
|
||||
|
||||
tree.AddChilds(0);
|
||||
int cleft = tree[0].LeftChild();
|
||||
int cright = tree[0].RightChild();
|
||||
tree[cleft].SetLeaf(0.2f, 0);
|
||||
tree[cright].SetLeaf(0.8f, 0);
|
||||
tree[0].SetSplit(2, 0.2f);
|
||||
|
||||
tree.Stat(cleft).base_weight = 1.2;
|
||||
tree.Stat(cright).base_weight = 1.3;
|
||||
|
||||
refresher->Init(cfg);
|
||||
refresher->Update(&gpair, dmat->get(), trees);
|
||||
|
||||
bst_float constexpr kEps = 1e-6;
|
||||
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
|
||||
ASSERT_NEAR(-0.224489, tree.Stat(0).loss_chg, kEps);
|
||||
ASSERT_NEAR(0, tree.Stat(cleft).loss_chg, kEps);
|
||||
ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
|
||||
ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user