Support distributed CPU env for categorical data. (#7575)
* Add support for cat data in sketch allreduce. * Share tests between CPU and GPU.
This commit is contained in:
@@ -58,10 +58,17 @@ void TestDistributedQuantile(size_t rows, size_t cols) {
|
||||
// Generate cuts for distributed environment.
|
||||
auto sparsity = 0.5f;
|
||||
auto rank = rabit::GetRank();
|
||||
std::vector<FeatureType> ft(cols);
|
||||
for (size_t i = 0; i < ft.size(); ++i) {
|
||||
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
|
||||
}
|
||||
|
||||
auto m = RandomDataGenerator{rows, cols, sparsity}
|
||||
.Seed(rank)
|
||||
.Lower(.0f)
|
||||
.Upper(1.0f)
|
||||
.Type(ft)
|
||||
.MaxCategory(13)
|
||||
.GenerateDMatrix();
|
||||
|
||||
std::vector<float> hessian(rows, 1.0);
|
||||
@@ -95,6 +102,8 @@ void TestDistributedQuantile(size_t rows, size_t cols) {
|
||||
for (auto rank = 0; rank < world; ++rank) {
|
||||
auto m = RandomDataGenerator{rows, cols, sparsity}
|
||||
.Seed(rank)
|
||||
.Type(ft)
|
||||
.MaxCategory(13)
|
||||
.Lower(.0f)
|
||||
.Upper(1.0f)
|
||||
.GenerateDMatrix();
|
||||
@@ -181,8 +190,15 @@ TEST(Quantile, SameOnAllWorkers) {
|
||||
kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
|
||||
auto rank = rabit::GetRank();
|
||||
HostDeviceVector<float> storage;
|
||||
std::vector<FeatureType> ft(kCols);
|
||||
for (size_t i = 0; i < ft.size(); ++i) {
|
||||
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
|
||||
}
|
||||
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Type(ft)
|
||||
.MaxCategory(17)
|
||||
.Seed(rank + seed)
|
||||
.GenerateDMatrix();
|
||||
auto cuts = SketchOnDMatrix(m.get(), n_bins);
|
||||
|
||||
Reference in New Issue
Block a user