[jvm-package] remove the coalesce in barrier mode (#7846)

This commit is contained in:
Bobby Wang
2022-04-27 23:34:22 +08:00
committed by GitHub
parent fdf533f2b9
commit 686caad40c
3 changed files with 8 additions and 17 deletions

View File

@@ -407,14 +407,9 @@ object GpuPreXGBoost extends PreXGBoostProvider {
}
private def repartitionInputData(dataFrame: DataFrame, nWorkers: Int): DataFrame = {
// We can't check dataFrame.rdd.getNumPartitions == nWorkers here, since dataFrame.rdd is
// a lazy variable. If we call it here, we will not directly extract RDD[Table] again,
// instead, we will involve Columnar -> Row -> Columnar and decrease the performance
if (nWorkers == 1) {
dataFrame.coalesce(1)
} else {
dataFrame.repartition(nWorkers)
}
// we can't involve any coalesce operation here, since Barrier mode will check
// the RDD patterns which does not allow coalesce.
dataFrame.repartition(nWorkers)
}
private def repartitionForGroup(