diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
index a2d8bb69a..79aac0f0b 100644
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -75,3 +75,13 @@ jobs:
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
env:
RABIT_MOCK: ON
+
+
+ - name: Build and Test XGBoost4J with scala 2.13
+ run: |
+ rm -rfv build/
+ cd jvm-packages
+ mvn -B clean install test -Pdefault,scala-2.13
+ if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
+ env:
+ RABIT_MOCK: ON
diff --git a/jvm-packages/.gitignore b/jvm-packages/.gitignore
index 6d3f7b7cb..e2dc7967a 100644
--- a/jvm-packages/.gitignore
+++ b/jvm-packages/.gitignore
@@ -1,2 +1,4 @@
tracker.py
build.sh
+xgboost4j-tester/pom.xml
+xgboost4j-tester/iris.csv
diff --git a/jvm-packages/README.md b/jvm-packages/README.md
index c4c8898dd..239464342 100644
--- a/jvm-packages/README.md
+++ b/jvm-packages/README.md
@@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5.
latest_version_num
```
+or
+```
+
+ ml.dmlc
+ xgboost4j_2.13
+ latest_version_num
+
+
+ ml.dmlc
+ xgboost4j-spark_2.13
+ latest_version_num
+
+```
sbt
```sbt
@@ -47,7 +60,6 @@ libraryDependencies ++= Seq(
For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases).
-To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
### Access SNAPSHOT version
@@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency:
latest_version_num-SNAPSHOT
```
+or with scala 2.13
+```
+
+ ml.dmlc
+ xgboost4j_2.13
+ latest_version_num-SNAPSHOT
+
+
+ ml.dmlc
+ xgboost4j-spark_2.13
+ latest_version_num-SNAPSHOT
+
+```
sbt
```sbt
@@ -96,7 +121,9 @@ libraryDependencies ++= Seq(
For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html).
+### GPU algorithm
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
+Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12.
## Examples
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index c80bf8bc6..fa0cb5344 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -5,7 +5,7 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
pom
XGBoost JVM Package
@@ -34,6 +34,7 @@
1.8
1.8
1.17.1
+ 4.13.2
3.4.0
3.3.2
2.12.17
@@ -45,7 +46,9 @@
23.04.0
23.04.1
cuda11
-
+ 3.2.16
+ 2.9.0
+
central_maven
@@ -71,6 +74,14 @@
+
+ scala-2.13
+
+ 2.13
+ 2.13.10
+
+
+
gpu
@@ -467,6 +478,7 @@
+
com.esotericsoftware
kryo
@@ -483,6 +495,11 @@
scala-library
${scala.version}
+
+ org.scala-lang.modules
+ scala-collection-compat_${scala.binary.version}
+ ${scala-collection-compat.version}
+
commons-logging
commons-logging
@@ -491,13 +508,13 @@
org.scalatest
scalatest_${scala.binary.version}
- 3.2.16
+ ${scalatest.version}
test
org.scalactic
scalactic_${scala.binary.version}
- 3.2.15
+ ${scalatest.version}
test
diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml
index 40c9c72a4..e6ed8a600 100644
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -5,10 +5,11 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
- xgboost4j-example_2.12
+ xgboost4j-example
+ xgboost4j-example_${scala.binary.version}
2.0.0-SNAPSHOT
jar
diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala
index cb859f62d..3bfefb841 100644
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala
@@ -73,12 +73,13 @@ object DistTrainWithFlink {
.map(_.f1.f0)
.returns(testDataTypeHint)
- val paramMap = mapAsJavaMap(Map(
- ("eta", "0.1".asInstanceOf[AnyRef]),
- ("max_depth", "2"),
- ("objective", "binary:logistic"),
- ("verbosity", "1")
- ))
+ val paramMap = Map(
+ ("eta", "0.1".asInstanceOf[AnyRef]),
+ ("max_depth", "2"),
+ ("objective", "binary:logistic"),
+ ("verbosity", "1")
+ )
+ .asJava
// number of iterations
val round = 2
diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
index 6d676b0ae..b8da31c09 100644
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
@@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature._
import org.apache.spark.ml.tuning._
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types._
-
-import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel}
+import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@@ -50,6 +49,13 @@ object SparkMLlibPipeline {
.appName("XGBoost4J-Spark Pipeline Example")
.getOrCreate()
+ run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers)
+ .show(false)
+ }
+ private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,
+ pipelineModelPath: String, treeMethod: String,
+ numWorkers: Int): DataFrame = {
+
// Load dataset
val schema = new StructType(Array(
StructField("sepal length", DoubleType, true),
@@ -90,11 +96,11 @@ object SparkMLlibPipeline {
val labelConverter = new IndexToString()
.setInputCol("prediction")
.setOutputCol("realLabel")
- .setLabels(labelIndexer.labels)
+ .setLabels(labelIndexer.labelsArray(0))
val pipeline = new Pipeline()
.setStages(Array(assembler, labelIndexer, booster, labelConverter))
- val model = pipeline.fit(training)
+ val model: PipelineModel = pipeline.fit(training)
// Batch prediction
val prediction = model.transform(test)
@@ -136,6 +142,6 @@ object SparkMLlibPipeline {
// Load a saved model and serving
val model2 = PipelineModel.load(pipelineModelPath)
- model2.transform(test).show(false)
+ model2.transform(test)
}
}
diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
index 17a32bc09..a7886f524 100644
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
@@ -17,9 +17,8 @@
package ml.dmlc.xgboost4j.scala.example.spark
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
-
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@@ -38,6 +37,12 @@ object SparkTraining {
val spark = SparkSession.builder().getOrCreate()
val inputPath = args(0)
+ val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers)
+ results.show()
+ }
+
+private[spark] def run(spark: SparkSession, inputPath: String,
+ treeMethod: String, numWorkers: Int): DataFrame = {
val schema = new StructType(Array(
StructField("sepal length", DoubleType, true),
StructField("sepal width", DoubleType, true),
@@ -81,7 +86,6 @@ object SparkTraining {
setFeaturesCol("features").
setLabelCol("classIndex")
val xgbClassificationModel = xgbClassifier.fit(train)
- val results = xgbClassificationModel.transform(test)
- results.show()
+ xgbClassificationModel.transform(test)
}
}
diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala
new file mode 100644
index 000000000..f6cb700df
--- /dev/null
+++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala
@@ -0,0 +1,123 @@
+/*
+ Copyright (c) 2014-2023 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package ml.dmlc.xgboost4j.scala.example.spark
+
+import org.apache.spark.sql.SparkSession
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
+import org.slf4j.LoggerFactory
+
+import java.io.File
+import java.nio.file.{Files, StandardOpenOption}
+import scala.jdk.CollectionConverters._
+import scala.util.{Random, Try}
+
+class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {
+ private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest])
+ private val random = new Random(42)
+ protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)
+
+ private val pathToTestDataset = Files.createTempFile("", "iris.csv").toAbsolutePath
+ private var spark: SparkSession = _
+
+ override def beforeAll(): Unit = {
+
+ def generateLine(i: Int): String = {
+ val getIrisName = (int: Int) => {
+ int % 3 match {
+ case 0 => "Iris-versicolor"
+ case 1 => "Iris-virginica"
+ case 2 => "Iris-setosa"
+ }
+ }
+ val generateValue = () => Math.abs(random.nextInt(99) * 0.1)
+ val sepalLength = generateValue()
+ val sepalWidth = generateValue()
+ val petalLength = generateValue()
+ val petalWidth = generateValue()
+ val irisName = getIrisName(Math.abs(random.nextInt()) + i)
+ s"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName"
+ }
+
+ if (spark == null) {
+ spark = SparkSession
+ .builder()
+ .appName("XGBoost4J-Spark Pipeline Example")
+ .master(s"local[${numWorkers}]")
+ .config("spark.ui.enabled", value = false)
+ .config("spark.driver.memory", "512m")
+ .config("spark.barrier.sync.timeout", 10)
+ .config("spark.task.cpus", 1)
+ .getOrCreate()
+ spark.sparkContext.setLogLevel("ERROR")
+ }
+ val data = (0 until 150)
+ .map(i => generateLine(i))
+ .toList
+ .asJava
+ Files.write(pathToTestDataset,
+ data,
+ StandardOpenOption.CREATE,
+ StandardOpenOption.WRITE,
+ StandardOpenOption.TRUNCATE_EXISTING)
+ logger.info(s"${new String(Files.readAllBytes(pathToTestDataset))}")
+
+ }
+
+ override def afterAll(): Unit = {
+ if (spark != null) {
+ spark.stop()
+ cleanExternalCache(spark.sparkContext.appName)
+ spark = null
+ }
+
+ Try(Files.deleteIfExists(pathToTestDataset))
+ .recover {
+ case e =>
+ logger.warn(
+ s"Could not delete temporary file $pathToTestDataset. Please, remove it manually",
+ e
+ )
+ true
+ }
+ }
+
+ private def cleanExternalCache(prefix: String): Unit = {
+ val dir = new File(".")
+ for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
+ file.delete()
+ }
+ }
+
+ test("Smoke test for SparkMLlibPipeline example") {
+ SparkMLlibPipeline.run(spark, pathToTestDataset.toString, "target/native-model",
+ "target/pipeline-model", "auto", 2)
+ }
+
+ test("Smoke test for SparkTraining example") {
+ val spark = SparkSession
+ .builder()
+ .appName("XGBoost4J-Spark Pipeline Example")
+ .master(s"local[${numWorkers}]")
+ .config("spark.ui.enabled", value = false)
+ .config("spark.driver.memory", "512m")
+ .config("spark.barrier.sync.timeout", 10)
+ .config("spark.task.cpus", 1)
+ .getOrCreate()
+
+ SparkTraining.run(spark, pathToTestDataset.toString, "auto", 2)
+ }
+}
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index a9a80e29a..8d51a9dcf 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -5,9 +5,11 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
+
+ xgboost4j-flink
xgboost4j-flink_${scala.binary.version}
2.0.0-SNAPSHOT
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
index 1d7a06708..f34680302 100644
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -5,10 +5,11 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
- xgboost4j-gpu_2.12
+ xgboost4j-gpu_${scala.binary.version}
+ xgboost4j-gpu
2.0.0-SNAPSHOT
jar
@@ -35,13 +36,13 @@
junit
junit
- 4.13.2
+ ${junit.version}
test
org.scalatest
scalatest_${scala.binary.version}
- 3.2.15
+ ${scalatest.version}
provided
diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml
index 57770be5a..b7be69e69 100644
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -5,10 +5,11 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
- xgboost4j-spark-gpu_2.12
+ xgboost4j-spark-gpu
+ xgboost4j-spark-gpu_${scala.binary.version}
@@ -24,7 +25,7 @@
ml.dmlc
xgboost4j-gpu_${scala.binary.version}
- 2.0.0-SNAPSHOT
+ ${project.version}
org.apache.spark
diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml
index 3a84233d1..d8f4cb914 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -5,10 +5,11 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
- xgboost4j-spark_2.12
+ xgboost4j-spark
+ xgboost4j-spark_${scala.binary.version}
@@ -24,7 +25,7 @@
ml.dmlc
xgboost4j_${scala.binary.version}
- 2.0.0-SNAPSHOT
+ ${project.version}
org.apache.spark
diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py
index cc391dd00..b9c274c28 100644
--- a/jvm-packages/xgboost4j-tester/generate_pom.py
+++ b/jvm-packages/xgboost4j-tester/generate_pom.py
@@ -8,25 +8,28 @@ pom_template = """
4.0.0
ml.dmlc
- xgboost4j-tester_2.12
+ xgboost4j-tester_{scala_binary_version}
1.0-SNAPSHOT
- xgboost4j-tester_2.12
+ xgboost4j-tester
UTF-8
{maven_compiler_source}
{maven_compiler_target}
+ 4.13.2
{spark_version}
{scala_version}
+ 3.2.15
{scala_binary_version}
+ 5.5.0
-
+
com.esotericsoftware
kryo
- 4.0.2
+ ${{kryo.version}}
org.scala-lang
@@ -48,29 +51,12 @@ pom_template = """
commons-logging
1.2
-
- com.typesafe.akka
- akka-testkit_${{scala.binary.version}}
- 2.6.20
- test
-
org.scalatest
scalatest_${{scala.binary.version}}
- 3.0.8
+ ${{scalatest.version}}
test
-
- org.scalactic
- scalactic_${{scala.binary.version}}
- 3.2.15
- test
-
-
- org.apache.commons
- commons-lang3
- 3.9
-
org.apache.spark
spark-core_${{scala.binary.version}}
@@ -92,7 +78,7 @@ pom_template = """
junit
junit
- 4.13.2
+ ${{junit.version}}
test
@@ -122,36 +108,9 @@ pom_template = """
-
-
- maven-clean-plugin
- 3.1.0
-
-
-
- maven-resources-plugin
- 3.0.2
-
-
- maven-compiler-plugin
- 3.8.0
-
-
- maven-jar-plugin
- 3.0.2
-
-
- maven-install-plugin
- 2.5.2
-
-
- maven-deploy-plugin
- 2.8.2
-
org.apache.maven.plugins
maven-assembly-plugin
- 2.4
jar-with-dependencies
@@ -171,22 +130,12 @@ pom_template = """
-
-
- maven-site-plugin
- 3.7.1
-
-
- maven-project-info-reports-plugin
- 3.0.0
-
org.apache.maven.plugins
maven-surefire-plugin
- 2.22.1
- ml.dmlc:xgboost4j_2.12
+ ml.dmlc:xgboost4j_${{scala.binary.version}}
diff --git a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java b/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java
deleted file mode 100644
index 2df693748..000000000
--- a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package ml.dmlc.xgboost4j.tester;
-
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-
-/**
- * Unit test for simple App.
- */
-public class AppTest
-{
- /**
- * Rigorous Test :-)
- */
- @Test
- public void shouldAnswerWithTrue()
- {
- assertTrue( true );
- }
-}
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index 17ed66a12..4352aab12 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -5,10 +5,11 @@
4.0.0
ml.dmlc
- xgboost-jvm_2.12
+ xgboost-jvm
2.0.0-SNAPSHOT
- xgboost4j_2.12
+ xgboost4j
+ xgboost4j_${scala.binary.version}
2.0.0-SNAPSHOT
jar
@@ -28,13 +29,13 @@
junit
junit
- 4.13.2
+ ${junit.version}
test
org.scalatest
scalatest_${scala.binary.version}
- 3.2.16
+ ${scalatest.version}
provided
diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala
index 587ace352..fe17804fd 100644
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala
@@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation {
*/
def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float
- private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
+ def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " +
s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}")
eval(predicts, new DMatrix(jdmat))
diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala
index 24e603762..de218f0c5 100644
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala
@@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective {
*/
def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]
- private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
+ def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
java.util.List[Array[Float]] = {
getGradient(predicts, new DMatrix(dtrain)).asJava
}
diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala
index 90d06c343..50d86c893 100644
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala
@@ -17,12 +17,11 @@
package ml.dmlc.xgboost4j.scala
import java.io.InputStream
+import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost}
-import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost}
-import scala.collection.JavaConverters._
-
+import scala.jdk.CollectionConverters._
import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.Path
/**
* XGBoost Scala Training function.
@@ -40,7 +39,12 @@ object XGBoost {
earlyStoppingRound: Int = 0,
prevBooster: Booster,
checkpointParams: Option[ExternalCheckpointParams]): Booster = {
- val jWatches = watches.mapValues(_.jDMatrix).asJava
+
+ // we have to filter null value for customized obj and eval
+ val jParams: java.util.Map[String, AnyRef] =
+ params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava
+
+ val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava
val jBooster = if (prevBooster == null) {
null
} else {
@@ -51,8 +55,7 @@ object XGBoost {
map(cp => {
JXGBoost.trainAndSaveCheckpoint(
dtrain.jDMatrix,
- // we have to filter null value for customized obj and eval
- params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
+ jParams,
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,
cp.checkpointInterval,
cp.checkpointPath,
@@ -61,8 +64,7 @@ object XGBoost {
getOrElse(
JXGBoost.train(
dtrain.jDMatrix,
- // we have to filter null value for customized obj and eval
- params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
+ jParams,
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)
)
if (prevBooster == null) {
diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh
index 1de43bbd0..33cfffe71 100755
--- a/tests/buildkite/build-jvm-packages.sh
+++ b/tests/buildkite/build-jvm-packages.sh
@@ -4,11 +4,18 @@ set -euo pipefail
source tests/buildkite/conftest.sh
-echo "--- Build XGBoost JVM packages"
+echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION}
+
+echo "--- Build XGBoost JVM packages scala 2.13"
+
+tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
+ ${SPARK_VERSION} "" "" "true"
+
echo "--- Stash XGBoost4J JARs"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
+buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh
index cf9270c11..957dd443c 100755
--- a/tests/buildkite/conftest.sh
+++ b/tests/buildkite/conftest.sh
@@ -25,7 +25,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.02
-SPARK_VERSION=3.1.1
+SPARK_VERSION=3.4.0
JDK_VERSION=8
if [[ -z ${BUILDKITE:-} ]]
diff --git a/tests/ci_build/Dockerfile.jvm_cross b/tests/ci_build/Dockerfile.jvm_cross
index 6d9c5c57f..fdfae310a 100644
--- a/tests/ci_build/Dockerfile.jvm_cross
+++ b/tests/ci_build/Dockerfile.jvm_cross
@@ -20,10 +20,14 @@ RUN \
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
- # Spark
- wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
- tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
- ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
+ # Spark with scala 2.12
+ mkdir -p /opt/spark-scala-2.12 && \
+ wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \
+ tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \
+ # Spark with scala 2.13
+ mkdir -p /opt/spark-scala-2.13 && \
+ wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \
+ tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh
index 241fc445f..5797a1f61 100755
--- a/tests/ci_build/build_jvm_packages.sh
+++ b/tests/ci_build/build_jvm_packages.sh
@@ -6,6 +6,7 @@ set -x
spark_version=$1
use_cuda=$2
gpu_arch=$3
+use_scala213=$4
gpu_options=""
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
@@ -22,7 +23,13 @@ export RABIT_MOCK=ON
if [ "x$gpu_arch" != "x" ]; then
export GPU_ARCH_FLAG=$gpu_arch
fi
-mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options
+
+mvn_profile_string=""
+if [ "x$use_scala213" != "x" ]; then
+ export mvn_profile_string="-Pdefault,scala-2.13"
+fi
+
+mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
set +x
set +e
diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh
index 378846d65..18265cf01 100755
--- a/tests/ci_build/test_jvm_cross.sh
+++ b/tests/ci_build/test_jvm_cross.sh
@@ -6,37 +6,56 @@ set -x
# Initialize local Maven repository
./tests/ci_build/initialize_maven.sh
-# Get version number of XGBoost4J and other auxiliary information
cd jvm-packages
+jvm_packages_dir=`pwd`
+# Get version number of XGBoost4J and other auxiliary information
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
-scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
-scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
-# Install XGBoost4J JAR into local Maven repository
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
-
-cd xgboost4j-tester
-# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
-python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
-# Run unit tests with XGBoost4J
-mvn --no-transfer-progress package
-
-# Run integration tests with XGBoost4J
-java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar
-
-# Run integration tests with XGBoost4J-Spark
-if [ ! -z "$RUN_INTEGRATION_TEST" ]
-then
+if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
+ cd $jvm_packages_dir/xgboost4j-tester
python3 get_iris.py
- spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv
- spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model
+ cd $jvm_packages_dir
fi
+# including maven profiles for different scala versions: 2.12 is the default at the moment.
+for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
+ scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
+ scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
+
+ # Install XGBoost4J JAR into local Maven repository
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
+
+ cd xgboost4j-tester
+ # Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
+ python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
+ # Build package and unit tests with XGBoost4J
+ mvn --no-transfer-progress clean package
+ xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar"
+ # Run integration tests with XGBoost4J
+ java -jar $xgboost4j_tester_jar
+
+ # Run integration tests with XGBoost4J-Spark
+ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
+ # Changing directory so that we do not mix code and resulting files
+ cd target
+ if [[ "$scala_binary_version" == "2.12" ]]; then
+ /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
+ /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
+ elif [[ "$scala_binary_version" == "2.13" ]]; then
+ /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
+ /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
+ else
+ echo "Unexpected scala version: $scala_version ($scala_binary_version)."
+ fi
+ fi
+ cd $jvm_packages_dir
+done
+
set +x
set +e