Column sampling at individual nodes (splits). (#3971)

* Column sampling at individual nodes (splits).

* Documented colsample_bynode parameter.

- also updated documentation for colsample_by* parameters

* Updated documentation.

* GetFeatureSet() returns shared pointer to std::vector.

* Sync sampled columns across multiple processes.
This commit is contained in:
Andy Adinets
2018-12-14 15:37:35 +01:00
committed by Jiaming Yuan
parent e0a279114e
commit 42bf90eb8f
8 changed files with 140 additions and 80 deletions

View File

@@ -5,33 +5,45 @@
namespace xgboost {
namespace common {
TEST(ColumnSampler, Test) {
int n = 100;
int n = 128;
ColumnSampler cs;
cs.Init(n, 0.5f, 0.5f);
auto &set0 = cs.GetFeatureSet(0).HostVector();
ASSERT_EQ(set0.size(), 25);
auto &set1 = cs.GetFeatureSet(0).HostVector();
// No node sampling
cs.Init(n, 1.0f, 0.5f, 0.5f);
auto set0 = *cs.GetFeatureSet(0);
ASSERT_EQ(set0.size(), 32);
auto set1 = *cs.GetFeatureSet(0);
ASSERT_EQ(set0, set1);
auto &set2 = cs.GetFeatureSet(1).HostVector();
auto set2 = *cs.GetFeatureSet(1);
ASSERT_NE(set1, set2);
ASSERT_EQ(set2.size(), 25);
ASSERT_EQ(set2.size(), 32);
// No level sampling, should be the same at different depth
cs.Init(n, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0).HostVector(), cs.GetFeatureSet(1).HostVector());
// Node sampling
cs.Init(n, 0.5f, 1.0f, 0.5f);
auto set3 = *cs.GetFeatureSet(0);
ASSERT_EQ(set3.size(), 32);
cs.Init(n, 1.0f, 1.0f);
auto &set3 = cs.GetFeatureSet(0).HostVector();
ASSERT_EQ(set3.size(), n);
cs.Init(n, 1.0f, 1.0f);
auto &set4 = cs.GetFeatureSet(0).HostVector();
ASSERT_EQ(set3, set4);
auto set4 = *cs.GetFeatureSet(0);
ASSERT_NE(set3, set4);
ASSERT_EQ(set4.size(), 32);
// No level or node sampling, should be the same at different depth
cs.Init(n, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(*cs.GetFeatureSet(0), *cs.GetFeatureSet(1));
cs.Init(n, 1.0f, 1.0f, 1.0f);
auto set5 = *cs.GetFeatureSet(0);
ASSERT_EQ(set5.size(), n);
cs.Init(n, 1.0f, 1.0f, 1.0f);
auto set6 = *cs.GetFeatureSet(0);
ASSERT_EQ(set5, set6);
// Should always be a minimum of one feature
cs.Init(n, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0).HostVector().size(), 1);
cs.Init(n, 1e-16f, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0)->size(), 1);
}
} // namespace common
} // namespace xgboost