xgboost/tests/cpp/common/test_random.cc
Andy Adinets 42bf90eb8f Column sampling at individual nodes (splits). (#3971)
* Column sampling at individual nodes (splits).

* Documented colsample_bynode parameter.

- also updated documentation for colsample_by* parameters

* Updated documentation.

* GetFeatureSet() returns shared pointer to std::vector.

* Sync sampled columns across multiple processes.
2018-12-14 22:37:35 +08:00

50 lines
1.2 KiB
C++

#include "../../../src/common/random.h"
#include "../helpers.h"
#include "gtest/gtest.h"
namespace xgboost {
namespace common {
TEST(ColumnSampler, Test) {
int n = 128;
ColumnSampler cs;
// No node sampling
cs.Init(n, 1.0f, 0.5f, 0.5f);
auto set0 = *cs.GetFeatureSet(0);
ASSERT_EQ(set0.size(), 32);
auto set1 = *cs.GetFeatureSet(0);
ASSERT_EQ(set0, set1);
auto set2 = *cs.GetFeatureSet(1);
ASSERT_NE(set1, set2);
ASSERT_EQ(set2.size(), 32);
// Node sampling
cs.Init(n, 0.5f, 1.0f, 0.5f);
auto set3 = *cs.GetFeatureSet(0);
ASSERT_EQ(set3.size(), 32);
auto set4 = *cs.GetFeatureSet(0);
ASSERT_NE(set3, set4);
ASSERT_EQ(set4.size(), 32);
// No level or node sampling, should be the same at different depth
cs.Init(n, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(*cs.GetFeatureSet(0), *cs.GetFeatureSet(1));
cs.Init(n, 1.0f, 1.0f, 1.0f);
auto set5 = *cs.GetFeatureSet(0);
ASSERT_EQ(set5.size(), n);
cs.Init(n, 1.0f, 1.0f, 1.0f);
auto set6 = *cs.GetFeatureSet(0);
ASSERT_EQ(set5, set6);
// Should always be a minimum of one feature
cs.Init(n, 1e-16f, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0)->size(), 1);
}
} // namespace common
} // namespace xgboost