Column sampling at individual nodes (splits). (#3971)
* Column sampling at individual nodes (splits). * Documented colsample_bynode parameter. - also updated documentation for colsample_by* parameters * Updated documentation. * GetFeatureSet() returns shared pointer to std::vector. * Sync sampled columns across multiple processes.
This commit is contained in:
committed by
Jiaming Yuan
parent
e0a279114e
commit
42bf90eb8f
@@ -5,33 +5,45 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ColumnSampler, Test) {
|
||||
int n = 100;
|
||||
int n = 128;
|
||||
ColumnSampler cs;
|
||||
cs.Init(n, 0.5f, 0.5f);
|
||||
auto &set0 = cs.GetFeatureSet(0).HostVector();
|
||||
ASSERT_EQ(set0.size(), 25);
|
||||
|
||||
auto &set1 = cs.GetFeatureSet(0).HostVector();
|
||||
// No node sampling
|
||||
cs.Init(n, 1.0f, 0.5f, 0.5f);
|
||||
auto set0 = *cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set0.size(), 32);
|
||||
|
||||
auto set1 = *cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set0, set1);
|
||||
|
||||
auto &set2 = cs.GetFeatureSet(1).HostVector();
|
||||
auto set2 = *cs.GetFeatureSet(1);
|
||||
ASSERT_NE(set1, set2);
|
||||
ASSERT_EQ(set2.size(), 25);
|
||||
ASSERT_EQ(set2.size(), 32);
|
||||
|
||||
// No level sampling, should be the same at different depth
|
||||
cs.Init(n, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0).HostVector(), cs.GetFeatureSet(1).HostVector());
|
||||
// Node sampling
|
||||
cs.Init(n, 0.5f, 1.0f, 0.5f);
|
||||
auto set3 = *cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set3.size(), 32);
|
||||
|
||||
cs.Init(n, 1.0f, 1.0f);
|
||||
auto &set3 = cs.GetFeatureSet(0).HostVector();
|
||||
ASSERT_EQ(set3.size(), n);
|
||||
cs.Init(n, 1.0f, 1.0f);
|
||||
auto &set4 = cs.GetFeatureSet(0).HostVector();
|
||||
ASSERT_EQ(set3, set4);
|
||||
auto set4 = *cs.GetFeatureSet(0);
|
||||
ASSERT_NE(set3, set4);
|
||||
ASSERT_EQ(set4.size(), 32);
|
||||
|
||||
// No level or node sampling, should be the same at different depth
|
||||
cs.Init(n, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(*cs.GetFeatureSet(0), *cs.GetFeatureSet(1));
|
||||
|
||||
cs.Init(n, 1.0f, 1.0f, 1.0f);
|
||||
auto set5 = *cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5.size(), n);
|
||||
cs.Init(n, 1.0f, 1.0f, 1.0f);
|
||||
auto set6 = *cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5, set6);
|
||||
|
||||
// Should always be a minimum of one feature
|
||||
cs.Init(n, 1e-16f, 1e-16f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0).HostVector().size(), 1);
|
||||
cs.Init(n, 1e-16f, 1e-16f, 1e-16f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->size(), 1);
|
||||
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -227,6 +227,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
TrainParam param;
|
||||
param.max_depth = 1;
|
||||
param.n_gpus = 1;
|
||||
param.colsample_bynode = 1;
|
||||
param.colsample_bylevel = 1;
|
||||
param.colsample_bytree = 1;
|
||||
param.min_child_weight = 0.01;
|
||||
@@ -284,6 +285,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
hist_maker.param_ = param;
|
||||
hist_maker.shards_.push_back(std::move(shard));
|
||||
hist_maker.column_sampler_.Init(n_cols,
|
||||
param.colsample_bynode,
|
||||
param.colsample_bylevel,
|
||||
param.colsample_bytree,
|
||||
false);
|
||||
|
||||
Reference in New Issue
Block a user