[jvm-package] remove the coalesce in barrier mode (#7846)

This commit is contained in:
Bobby Wang 2022-04-27 23:34:22 +08:00 committed by GitHub
parent fdf533f2b9
commit 686caad40c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 17 deletions

View File

@ -69,7 +69,7 @@ public class BoosterTest {
.hasHeader().build(); .hasHeader().build();
int maxBin = 16; int maxBin = 16;
int round = 100; int round = 10;
//set params //set params
Map<String, Object> paramMap = new HashMap<String, Object>() { Map<String, Object> paramMap = new HashMap<String, Object>() {
{ {

View File

@ -407,15 +407,10 @@ object GpuPreXGBoost extends PreXGBoostProvider {
} }
private def repartitionInputData(dataFrame: DataFrame, nWorkers: Int): DataFrame = { private def repartitionInputData(dataFrame: DataFrame, nWorkers: Int): DataFrame = {
// We can't check dataFrame.rdd.getNumPartitions == nWorkers here, since dataFrame.rdd is // we can't involve any coalesce operation here, since Barrier mode will check
// a lazy variable. If we call it here, we will not directly extract RDD[Table] again, // the RDD patterns which does not allow coalesce.
// instead, we will involve Columnar -> Row -> Columnar and decrease the performance
if (nWorkers == 1) {
dataFrame.coalesce(1)
} else {
dataFrame.repartition(nWorkers) dataFrame.repartition(nWorkers)
} }
}
private def repartitionForGroup( private def repartitionForGroup(
groupName: String, groupName: String,

View File

@ -39,13 +39,8 @@ trait GpuTestSuite extends FunSuite with TmpFolderSuite {
def enableCsvConf(): SparkConf = { def enableCsvConf(): SparkConf = {
new SparkConf() new SparkConf()
.set(RapidsConf.ENABLE_READ_CSV_DATES.key, "true") .set("spark.rapids.sql.csv.read.float.enabled", "true")
.set(RapidsConf.ENABLE_READ_CSV_BYTES.key, "true") .set("spark.rapids.sql.csv.read.double.enabled", "true")
.set(RapidsConf.ENABLE_READ_CSV_SHORTS.key, "true")
.set(RapidsConf.ENABLE_READ_CSV_INTEGERS.key, "true")
.set(RapidsConf.ENABLE_READ_CSV_LONGS.key, "true")
.set(RapidsConf.ENABLE_READ_CSV_FLOATS.key, "true")
.set(RapidsConf.ENABLE_READ_CSV_DOUBLES.key, "true")
} }
def withGpuSparkSession[U](conf: SparkConf = new SparkConf())(f: SparkSession => U): U = { def withGpuSparkSession[U](conf: SparkConf = new SparkConf())(f: SparkSession => U): U = {
@ -246,12 +241,13 @@ object SparkSessionHolder extends Logging {
Locale.setDefault(Locale.US) Locale.setDefault(Locale.US)
val builder = SparkSession.builder() val builder = SparkSession.builder()
.master("local[1]") .master("local[2]")
.config("spark.sql.adaptive.enabled", "false") .config("spark.sql.adaptive.enabled", "false")
.config("spark.rapids.sql.enabled", "false") .config("spark.rapids.sql.enabled", "false")
.config("spark.rapids.sql.test.enabled", "false") .config("spark.rapids.sql.test.enabled", "false")
.config("spark.plugins", "com.nvidia.spark.SQLPlugin") .config("spark.plugins", "com.nvidia.spark.SQLPlugin")
.config("spark.rapids.memory.gpu.pooling.enabled", "false") // Disable RMM for unit tests. .config("spark.rapids.memory.gpu.pooling.enabled", "false") // Disable RMM for unit tests.
.config("spark.sql.files.maxPartitionBytes", "1000")
.appName("XGBoost4j-Spark-Gpu unit test") .appName("XGBoost4j-Spark-Gpu unit test")
builder.getOrCreate() builder.getOrCreate()