Reduce time for some multi-gpu tests (#8288)
* Faster dask tests * Reuse AllReducer objects in tests. * Faster boost from prediction tests. * Use rmm dask fixture. * Speed up dask demo. * mypy * Format with black. * mypy * Clang-tidy Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -349,6 +349,9 @@ TEST(GPUQuantile, AllReduceBasic) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto reducer = std::make_shared<dh::AllReducer>();
|
||||
reducer->Init(0);
|
||||
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
// Set up single node version;
|
||||
@@ -378,12 +381,12 @@ TEST(GPUQuantile, AllReduceBasic) {
|
||||
}
|
||||
sketch_on_single_node.Unique();
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
sketch_on_single_node.ColumnsPtr(), true);
|
||||
|
||||
// Set up distributed version. We rely on using rank as seed to generate
|
||||
// the exact same copy of data.
|
||||
auto rank = rabit::GetRank();
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, 0, reducer);
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
@@ -402,7 +405,7 @@ TEST(GPUQuantile, AllReduceBasic) {
|
||||
sketch_on_single_node.Data().size());
|
||||
|
||||
TestQuantileElemRank(0, sketch_distributed.Data(),
|
||||
sketch_distributed.ColumnsPtr());
|
||||
sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
std::vector<SketchEntry> single_node_data(
|
||||
sketch_on_single_node.Data().size());
|
||||
@@ -432,13 +435,15 @@ TEST(GPUQuantile, SameOnAllWorkers) {
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
auto reducer = std::make_shared<dh::AllReducer>();
|
||||
reducer->Init(0);
|
||||
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
auto rank = rabit::GetRank();
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, 0, reducer);
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
@@ -450,7 +455,7 @@ TEST(GPUQuantile, SameOnAllWorkers) {
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce();
|
||||
sketch_distributed.Unique();
|
||||
TestQuantileElemRank(0, sketch_distributed.Data(), sketch_distributed.ColumnsPtr());
|
||||
TestQuantileElemRank(0, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
// Test for all workers having the same sketch.
|
||||
size_t n_data = sketch_distributed.Data().size();
|
||||
@@ -467,12 +472,9 @@ TEST(GPUQuantile, SameOnAllWorkers) {
|
||||
thrust::copy(thrust::device, local_data.data(),
|
||||
local_data.data() + local_data.size(),
|
||||
all_workers.begin() + local_data.size() * rank);
|
||||
dh::AllReducer reducer;
|
||||
reducer.Init(0);
|
||||
|
||||
reducer.AllReduceSum(all_workers.data().get(), all_workers.data().get(),
|
||||
reducer->AllReduceSum(all_workers.data().get(), all_workers.data().get(),
|
||||
all_workers.size());
|
||||
reducer.Synchronize();
|
||||
reducer->Synchronize();
|
||||
|
||||
auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);
|
||||
std::vector<float> h_base_line(base_line.size());
|
||||
|
||||
@@ -37,12 +37,12 @@ inline void InitRabitContext(std::string msg, int32_t n_workers) {
|
||||
}
|
||||
|
||||
template <typename Fn> void RunWithSeedsAndBins(size_t rows, Fn fn) {
|
||||
std::vector<int32_t> seeds(4);
|
||||
std::vector<int32_t> seeds(2);
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist(3, 1000);
|
||||
std::generate(seeds.begin(), seeds.end(), [&](){ return dist(&lcg); });
|
||||
|
||||
std::vector<size_t> bins(8);
|
||||
std::vector<size_t> bins(2);
|
||||
for (size_t i = 0; i < bins.size() - 1; ++i) {
|
||||
bins[i] = i * 35 + 2;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user