From 4e12f3e1bcdf0163f907656876b2355087547895 Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Sat, 7 Jan 2023 18:59:17 +0800 Subject: [PATCH] [Breaking][jvm-packages] Bump rapids version to 22.12.0 (#8648) * [jvm-packages] Bump rapids version to 22.12.0 This PR bumps spark version to 3.1.1 and the rapids version to 22.12.0, which results in the latest xgboost can't run with the old rapids packages. --- jvm-packages/pom.xml | 6 ++--- .../scala/rapids/spark/GpuUtils.scala | 13 ++------- .../scala/rapids/spark/GpuTestSuite.scala | 7 +++-- .../scala/org/apache/spark/GpuTestUtils.scala | 27 +++++++++++++++++++ .../spark/ml/util/XGBoostSchemaUtils.scala | 3 +-- tests/buildkite/conftest.sh | 2 +- 6 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 jvm-packages/xgboost4j-spark-gpu/src/test/scala/org/apache/spark/GpuTestUtils.scala diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 6d4288b9a..f363122cb 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -34,15 +34,15 @@ 1.8 1.8 1.8.3 - 3.0.3 + 3.1.1 2.12.8 2.12 3.3.4 5 OFF OFF - 21.08.2 - 21.08.0 + 22.12.0 + 22.12.0 cuda11 diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala index f5876fded..c88aefa4e 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala @@ -17,7 +17,7 @@ package ml.dmlc.xgboost4j.scala.rapids.spark import ai.rapids.cudf.Table -import com.nvidia.spark.rapids.{ColumnarRdd, GpuColumnVector} +import com.nvidia.spark.rapids.{ColumnarRdd, GpuColumnVectorUtils} import ml.dmlc.xgboost4j.scala.spark.util.Utils import org.apache.spark.rdd.RDD @@ -31,16 +31,7 @@ private[spark] object GpuUtils { def extractBatchToHost(table: Table, types: Array[DataType]): Array[ColumnVector] = { // spark-rapids has shimmed the GpuColumnVector from 22.10 - try { - val clazz = Utils.classForName("com.nvidia.spark.rapids.GpuColumnVectorUtils") - clazz.getDeclaredMethod("extractHostColumns", classOf[Table], classOf[Array[DataType]]) - .invoke(null, table, types).asInstanceOf[Array[ColumnVector]] - } catch { - case _: ClassNotFoundException => - // If it's older version, use the GpuColumnVector - GpuColumnVector.extractColumns(table, types).map(_.copyToHost()) - case e: Throwable => throw e - } + GpuColumnVectorUtils.extractHostColumns(table, types) } def toColumnarRdd(df: DataFrame): RDD[Table] = ColumnarRdd(df) diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala index a8d7a81ed..175e00b39 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 by Contributors + Copyright (c) 2021-2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,11 +22,10 @@ import java.util.{Locale, TimeZone} import org.scalatest.{BeforeAndAfterAll, FunSuite} -import org.apache.spark.SparkConf +import org.apache.spark.{GpuTestUtils, SparkConf} import org.apache.spark.internal.Logging import org.apache.spark.network.util.JavaUtils import org.apache.spark.sql.{Row, SparkSession} -import org.apache.spark.sql.rapids.execution.TrampolineUtil trait GpuTestSuite extends FunSuite with TmpFolderSuite { import SparkSessionHolder.withSparkSession @@ -232,7 +231,7 @@ object SparkSessionHolder extends Logging { } private def createSparkSession(): SparkSession = { - TrampolineUtil.cleanupAnyExistingSession() + GpuTestUtils.cleanupAnyExistingSession() // Timezone is fixed to UTC to allow timestamps to work by default TimeZone.setDefault(TimeZone.getTimeZone("UTC")) diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/org/apache/spark/GpuTestUtils.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/org/apache/spark/GpuTestUtils.scala new file mode 100644 index 000000000..a8d7957bf --- /dev/null +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/org/apache/spark/GpuTestUtils.scala @@ -0,0 +1,27 @@ +/* + Copyright (c) 2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +package org.apache.spark + +import org.apache.spark.sql.SparkSession + +object GpuTestUtils { + + def cleanupAnyExistingSession(): Unit = { + SparkSession.cleanupAnyExistingSession() + } + +} diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostSchemaUtils.scala b/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostSchemaUtils.scala index 8765d39f3..c013cfe66 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostSchemaUtils.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/org/apache/spark/ml/util/XGBoostSchemaUtils.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2022 by Contributors + Copyright (c) 2022-2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ package org.apache.spark.ml.util import org.apache.spark.sql.types.{BooleanType, DataType, NumericType, StructType} import org.apache.spark.ml.linalg.VectorUDT -import org.apache.spark.ml.util.SchemaUtils object XGBoostSchemaUtils { diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index 30ef4aeab..8e315c9cd 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -24,7 +24,7 @@ set -x CUDA_VERSION=11.0.3 RAPIDS_VERSION=22.10 -SPARK_VERSION=3.0.1 +SPARK_VERSION=3.1.1 JDK_VERSION=8 if [[ -z ${BUILDKITE:-} ]]