sync Jun 1

2023-06-01 15:55:06 -07:00
parent c5b575e00e fa2ab1f021
commit 42867a4805
76 changed files with 1424 additions and 595 deletions
--- a/jvm-packages/.gitignore
+++ b/jvm-packages/.gitignore
@@ -1,2 +1,4 @@
 tracker.py
 build.sh
+xgboost4j-tester/pom.xml
+xgboost4j-tester/iris.csv
--- a/jvm-packages/README.md
+++ b/jvm-packages/README.md
@@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5.
    <version>latest_version_num</version>
 </dependency>
 ```
+or 
+```
+<dependency>
+    <groupId>ml.dmlc</groupId>
+    <artifactId>xgboost4j_2.13</artifactId>
+    <version>latest_version_num</version>
+</dependency>
+<dependency>
+    <groupId>ml.dmlc</groupId>
+    <artifactId>xgboost4j-spark_2.13</artifactId>
+    <version>latest_version_num</version>
+</dependency>
+```

 <b>sbt</b>
 ```sbt
@@ -47,7 +60,6 @@ libraryDependencies ++= Seq(

 For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases).

-To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.

 ### Access SNAPSHOT version

@@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency:
    <version>latest_version_num-SNAPSHOT</version>
 </dependency>
 ```
+or with scala 2.13 
+```
+<dependency>
+    <groupId>ml.dmlc</groupId>
+    <artifactId>xgboost4j_2.13</artifactId>
+    <version>latest_version_num-SNAPSHOT</version>
+</dependency>
+<dependency>
+    <groupId>ml.dmlc</groupId>
+    <artifactId>xgboost4j-spark_2.13</artifactId>
+    <version>latest_version_num-SNAPSHOT</version>
+</dependency>
+```

 <b>sbt</b>
 ```sbt
@@ -96,7 +121,9 @@ libraryDependencies ++= Seq(

 For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html).

+### GPU algorithm
 To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
+Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12.

 ## Examples

--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -5,7 +5,7 @@
    <modelVersion>4.0.0</modelVersion>

    <groupId>ml.dmlc</groupId>
-    <artifactId>xgboost-jvm_2.12</artifactId>
+    <artifactId>xgboost-jvm</artifactId>
    <version>2.0.0-SNAPSHOT</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
@@ -33,7 +33,8 @@
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
-        <flink.version>1.17.0</flink.version>
+        <flink.version>1.17.1</flink.version>
+        <junit.version>4.13.2</junit.version>
        <spark.version>3.4.0</spark.version>
        <spark.version.gpu>3.3.2</spark.version.gpu>
        <scala.version>2.12.17</scala.version>
@@ -45,7 +46,9 @@
        <cudf.version>23.04.0</cudf.version>
        <spark.rapids.version>23.04.1</spark.rapids.version>
        <cudf.classifier>cuda11</cudf.classifier>
-    </properties>
+        <scalatest.version>3.2.16</scalatest.version>
+        <scala-collection-compat.version>2.10.0</scala-collection-compat.version>
+      </properties>
    <repositories>
        <repository>
            <id>central_maven</id>
@@ -71,6 +74,14 @@
            </modules>
        </profile>

+        <profile>
+            <id>scala-2.13</id>
+            <properties>
+                <scala.binary.version>2.13</scala.binary.version>
+                <scala.version>2.13.10</scala.version>
+            </properties>
+        </profile>
+
        <!-- gpu profile with both cpu and gpu test suites -->
        <profile>
            <id>gpu</id>
@@ -451,7 +462,7 @@
        <plugins>
            <plugin>
                <artifactId>maven-project-info-reports-plugin</artifactId>
-                <version>3.4.3</version>
+                <version>3.4.4</version>
            </plugin>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
@@ -467,6 +478,7 @@
        </plugins>
    </reporting>
    <dependencies>
+
        <dependency>
            <groupId>com.esotericsoftware</groupId>
            <artifactId>kryo</artifactId>
@@ -483,6 +495,11 @@
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+          <version>${scala-collection-compat.version}</version>
+        </dependency>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
@@ -491,13 +508,13 @@
        <dependency>
            <groupId>org.scalatest</groupId>
            <artifactId>scalatest_${scala.binary.version}</artifactId>
-            <version>3.2.16</version>
+            <version>${scalatest.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.scalactic</groupId>
            <artifactId>scalactic_${scala.binary.version}</artifactId>
-            <version>3.2.15</version>
+            <version>${scalatest.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -5,10 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm_2.12</artifactId>
+        <artifactId>xgboost-jvm</artifactId>
        <version>2.0.0-SNAPSHOT</version>
    </parent>
-    <artifactId>xgboost4j-example_2.12</artifactId>
+    <name>xgboost4j-example</name>
+    <artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
    <version>2.0.0-SNAPSHOT</version>
    <packaging>jar</packaging>
    <build>
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala
@@ -73,12 +73,13 @@ object DistTrainWithFlink {
          .map(_.f1.f0)
          .returns(testDataTypeHint)

-    val paramMap = mapAsJavaMap(Map(
-      ("eta", "0.1".asInstanceOf[AnyRef]),
-      ("max_depth", "2"),
-      ("objective", "binary:logistic"),
-      ("verbosity", "1")
-    ))
+    val paramMap = Map(
+        ("eta", "0.1".asInstanceOf[AnyRef]),
+        ("max_depth", "2"),
+        ("objective", "binary:logistic"),
+        ("verbosity", "1")
+      )
+      .asJava

    // number of iterations
    val round = 2
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
@@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel}
 import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
 import org.apache.spark.ml.feature._
 import org.apache.spark.ml.tuning._
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.types._
-
-import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel}
+import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}

 // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)

@@ -50,6 +49,13 @@ object SparkMLlibPipeline {
      .appName("XGBoost4J-Spark Pipeline Example")
      .getOrCreate()

+    run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers)
+      .show(false)
+  }
+  private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,
+                         pipelineModelPath: String, treeMethod: String,
+                         numWorkers: Int): DataFrame = {
+
    // Load dataset
    val schema = new StructType(Array(
      StructField("sepal length", DoubleType, true),
@@ -90,11 +96,11 @@ object SparkMLlibPipeline {
    val labelConverter = new IndexToString()
      .setInputCol("prediction")
      .setOutputCol("realLabel")
-      .setLabels(labelIndexer.labels)
+      .setLabels(labelIndexer.labelsArray(0))

    val pipeline = new Pipeline()
      .setStages(Array(assembler, labelIndexer, booster, labelConverter))
-    val model = pipeline.fit(training)
+    val model: PipelineModel = pipeline.fit(training)

    // Batch prediction
    val prediction = model.transform(test)
@@ -136,6 +142,6 @@ object SparkMLlibPipeline {

    // Load a saved model and serving
    val model2 = PipelineModel.load(pipelineModelPath)
-    model2.transform(test).show(false)
+    model2.transform(test)
  }
 }
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
@@ -17,9 +17,8 @@
 package ml.dmlc.xgboost4j.scala.example.spark

 import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
-
 import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}

 // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@@ -38,6 +37,12 @@ object SparkTraining {

    val spark = SparkSession.builder().getOrCreate()
    val inputPath = args(0)
+    val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers)
+    results.show()
+  }
+
+private[spark] def run(spark: SparkSession, inputPath: String,
+                       treeMethod: String, numWorkers: Int): DataFrame =  {
    val schema = new StructType(Array(
      StructField("sepal length", DoubleType, true),
      StructField("sepal width", DoubleType, true),
@@ -81,7 +86,6 @@ object SparkTraining {
      setFeaturesCol("features").
      setLabelCol("classIndex")
    val xgbClassificationModel = xgbClassifier.fit(train)
-    val results = xgbClassificationModel.transform(test)
-    results.show()
+    xgbClassificationModel.transform(test)
  }
 }
--- a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala
+++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala
@@ -0,0 +1,123 @@
+/*
+ Copyright (c) 2014-2023 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package ml.dmlc.xgboost4j.scala.example.spark
+
+import org.apache.spark.sql.SparkSession
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
+import org.slf4j.LoggerFactory
+
+import java.io.File
+import java.nio.file.{Files, StandardOpenOption}
+import scala.jdk.CollectionConverters._
+import scala.util.{Random, Try}
+
+class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {
+  private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest])
+  private val random = new Random(42)
+  protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)
+
+  private val pathToTestDataset = Files.createTempFile("", "iris.csv").toAbsolutePath
+  private var spark: SparkSession = _
+
+  override def beforeAll(): Unit = {
+
+    def generateLine(i: Int): String = {
+      val getIrisName = (int: Int) => {
+        int % 3 match {
+          case 0 => "Iris-versicolor"
+          case 1 => "Iris-virginica"
+          case 2 => "Iris-setosa"
+        }
+      }
+      val generateValue = () => Math.abs(random.nextInt(99) * 0.1)
+      val sepalLength = generateValue()
+      val sepalWidth = generateValue()
+      val petalLength = generateValue()
+      val petalWidth = generateValue()
+      val irisName = getIrisName(Math.abs(random.nextInt()) + i)
+      s"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName"
+    }
+
+    if (spark == null) {
+       spark = SparkSession
+        .builder()
+        .appName("XGBoost4J-Spark Pipeline Example")
+        .master(s"local[${numWorkers}]")
+        .config("spark.ui.enabled", value = false)
+        .config("spark.driver.memory", "512m")
+        .config("spark.barrier.sync.timeout", 10)
+        .config("spark.task.cpus", 1)
+        .getOrCreate()
+      spark.sparkContext.setLogLevel("ERROR")
+    }
+    val data = (0 until 150)
+      .map(i => generateLine(i))
+      .toList
+      .asJava
+    Files.write(pathToTestDataset,
+      data,
+      StandardOpenOption.CREATE,
+      StandardOpenOption.WRITE,
+      StandardOpenOption.TRUNCATE_EXISTING)
+    logger.info(s"${new String(Files.readAllBytes(pathToTestDataset))}")
+
+  }
+
+  override def afterAll(): Unit = {
+    if (spark != null) {
+      spark.stop()
+      cleanExternalCache(spark.sparkContext.appName)
+      spark = null
+    }
+
+    Try(Files.deleteIfExists(pathToTestDataset))
+      .recover {
+        case e =>
+          logger.warn(
+            s"Could not delete temporary file $pathToTestDataset. Please, remove it manually",
+            e
+          )
+          true
+    }
+  }
+
+  private def cleanExternalCache(prefix: String): Unit = {
+    val dir = new File(".")
+    for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
+      file.delete()
+    }
+  }
+
+  test("Smoke test for SparkMLlibPipeline example") {
+    SparkMLlibPipeline.run(spark, pathToTestDataset.toString, "target/native-model",
+      "target/pipeline-model", "auto", 2)
+  }
+
+  test("Smoke test for SparkTraining example") {
+    val spark = SparkSession
+      .builder()
+      .appName("XGBoost4J-Spark Pipeline Example")
+      .master(s"local[${numWorkers}]")
+      .config("spark.ui.enabled", value = false)
+      .config("spark.driver.memory", "512m")
+      .config("spark.barrier.sync.timeout", 10)
+      .config("spark.task.cpus", 1)
+      .getOrCreate()
+
+    SparkTraining.run(spark, pathToTestDataset.toString, "auto", 2)
+  }
+}
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -5,9 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm_2.12</artifactId>
+        <artifactId>xgboost-jvm</artifactId>
        <version>2.0.0-SNAPSHOT</version>
    </parent>
+
+    <name>xgboost4j-flink</name>
    <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
    <version>2.0.0-SNAPSHOT</version>
    <properties>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -5,10 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm_2.12</artifactId>
+        <artifactId>xgboost-jvm</artifactId>
        <version>2.0.0-SNAPSHOT</version>
    </parent>
-    <artifactId>xgboost4j-gpu_2.12</artifactId>
+    <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
+    <name>xgboost4j-gpu</name>
    <version>2.0.0-SNAPSHOT</version>
    <packaging>jar</packaging>

@@ -35,13 +36,13 @@
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
-            <version>4.13.2</version>
+            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.scalatest</groupId>
            <artifactId>scalatest_${scala.binary.version}</artifactId>
-            <version>3.2.15</version>
+            <version>${scalatest.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -5,10 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm_2.12</artifactId>
+        <artifactId>xgboost-jvm</artifactId>
        <version>2.0.0-SNAPSHOT</version>
    </parent>
-    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
+    <name>xgboost4j-spark-gpu</name>
+    <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
    <build>
        <plugins>
            <plugin>
@@ -24,7 +25,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>2.0.0-SNAPSHOT</version>
+            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -5,10 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm_2.12</artifactId>
+        <artifactId>xgboost-jvm</artifactId>
        <version>2.0.0-SNAPSHOT</version>
    </parent>
-    <artifactId>xgboost4j-spark_2.12</artifactId>
+    <name>xgboost4j-spark</name>
+    <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
    <build>
        <plugins>
            <plugin>
@@ -24,7 +25,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>2.0.0-SNAPSHOT</version>
+            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-tester/generate_pom.py
+++ b/jvm-packages/xgboost4j-tester/generate_pom.py
@@ -8,25 +8,28 @@ pom_template = """
  <modelVersion>4.0.0</modelVersion>

  <groupId>ml.dmlc</groupId>
-  <artifactId>xgboost4j-tester_2.12</artifactId>
+  <artifactId>xgboost4j-tester_{scala_binary_version}</artifactId>
  <version>1.0-SNAPSHOT</version>

-  <name>xgboost4j-tester_2.12</name>
+  <name>xgboost4j-tester</name>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>{maven_compiler_source}</maven.compiler.source>
    <maven.compiler.target>{maven_compiler_target}</maven.compiler.target>
+    <junit.version>4.13.2</junit.version>
    <spark.version>{spark_version}</spark.version>
    <scala.version>{scala_version}</scala.version>
+    <scalatest.version>3.2.15</scalatest.version>
    <scala.binary.version>{scala_binary_version}</scala.binary.version>
+    <kryo.version>5.5.0</kryo.version>
  </properties>

  <dependencies>
-    <dependency>
+   <dependency>
      <groupId>com.esotericsoftware</groupId>
      <artifactId>kryo</artifactId>
-      <version>4.0.2</version>
+      <version>${{kryo.version}}</version>
    </dependency>
    <dependency>
      <groupId>org.scala-lang</groupId>
@@ -48,29 +51,12 @@ pom_template = """
      <artifactId>commons-logging</artifactId>
      <version>1.2</version>
    </dependency>
-    <dependency>
-      <groupId>com.typesafe.akka</groupId>
-      <artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
-      <version>2.6.20</version>
-      <scope>test</scope>
-    </dependency>
    <dependency>
      <groupId>org.scalatest</groupId>
      <artifactId>scalatest_${{scala.binary.version}}</artifactId>
-      <version>3.0.8</version>
+      <version>${{scalatest.version}}</version>
      <scope>test</scope>
    </dependency>
-    <dependency>
-      <groupId>org.scalactic</groupId>
-      <artifactId>scalactic_${{scala.binary.version}}</artifactId>
-      <version>3.2.15</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-      <version>3.9</version>
-    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_${{scala.binary.version}}</artifactId>
@@ -92,7 +78,7 @@ pom_template = """
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
-      <version>4.13.2</version>
+      <version>${{junit.version}}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
@@ -122,36 +108,9 @@ pom_template = """

  <build>
    <plugins>
-      <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
-      <plugin>
-        <artifactId>maven-clean-plugin</artifactId>
-        <version>3.1.0</version>
-      </plugin>
-      <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
-      <plugin>
-        <artifactId>maven-resources-plugin</artifactId>
-        <version>3.0.2</version>
-      </plugin>
-      <plugin>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.8.0</version>
-      </plugin>
-      <plugin>
-        <artifactId>maven-jar-plugin</artifactId>
-        <version>3.0.2</version>
-      </plugin>
-      <plugin>
-        <artifactId>maven-install-plugin</artifactId>
-        <version>2.5.2</version>
-      </plugin>
-      <plugin>
-        <artifactId>maven-deploy-plugin</artifactId>
-        <version>2.8.2</version>
-      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-assembly-plugin</artifactId>
-        <version>2.4</version>
        <configuration>
          <descriptorRefs>
            <descriptorRef>jar-with-dependencies</descriptorRef>
@@ -171,22 +130,12 @@ pom_template = """
          </execution>
        </executions>
      </plugin>
-      <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
-      <plugin>
-        <artifactId>maven-site-plugin</artifactId>
-        <version>3.7.1</version>
-      </plugin>
-      <plugin>
-        <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
-      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-surefire-plugin</artifactId>
-        <version>2.22.1</version>
        <configuration>
          <dependenciesToScan>
-            <dependency>ml.dmlc:xgboost4j_2.12</dependency>
+            <dependency>ml.dmlc:xgboost4j_${{scala.binary.version}}</dependency>
          </dependenciesToScan>
        </configuration>
      </plugin>
--- a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java
+++ b/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java
@@ -1,20 +0,0 @@
-package ml.dmlc.xgboost4j.tester;
-
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-
-/**
- * Unit test for simple App.
- */
-public class AppTest 
-{
-    /**
-     * Rigorous Test :-)
-     */
-    @Test
-    public void shouldAnswerWithTrue()
-    {
-        assertTrue( true );
-    }
-}
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -5,10 +5,11 @@
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>ml.dmlc</groupId>
-        <artifactId>xgboost-jvm_2.12</artifactId>
+        <artifactId>xgboost-jvm</artifactId>
        <version>2.0.0-SNAPSHOT</version>
    </parent>
-    <artifactId>xgboost4j_2.12</artifactId>
+    <name>xgboost4j</name>
+    <artifactId>xgboost4j_${scala.binary.version}</artifactId>
    <version>2.0.0-SNAPSHOT</version>
    <packaging>jar</packaging>

@@ -28,13 +29,13 @@
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
-            <version>4.13.2</version>
+            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
          <groupId>org.scalatest</groupId>
          <artifactId>scalatest_${scala.binary.version}</artifactId>
-          <version>3.2.16</version>
+          <version>${scalatest.version}</version>
          <scope>provided</scope>
        </dependency>
    </dependencies>
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala
@@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation {
   */
  def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float

-  private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
+  def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
    require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " +
      s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}")
    eval(predicts, new DMatrix(jdmat))
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala
@@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective {
   */
  def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]

-  private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
+  def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
    java.util.List[Array[Float]] = {
    getGradient(predicts, new DMatrix(dtrain)).asJava
  }
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala
+++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala
@@ -17,12 +17,11 @@
 package ml.dmlc.xgboost4j.scala

 import java.io.InputStream
+import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost}

-import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost}
-import scala.collection.JavaConverters._
-
+import scala.jdk.CollectionConverters._
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.Path

 /**
  * XGBoost Scala Training function.
@@ -40,7 +39,12 @@ object XGBoost {
      earlyStoppingRound: Int = 0,
      prevBooster: Booster,
      checkpointParams: Option[ExternalCheckpointParams]): Booster = {
-    val jWatches = watches.mapValues(_.jDMatrix).asJava
+
+    // we have to filter null value for customized obj and eval
+    val jParams: java.util.Map[String, AnyRef] =
+      params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava
+
+    val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava
    val jBooster = if (prevBooster == null) {
      null
    } else {
@@ -51,8 +55,7 @@ object XGBoost {
      map(cp => {
          JXGBoost.trainAndSaveCheckpoint(
            dtrain.jDMatrix,
-            // we have to filter null value for customized obj and eval
-            params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
+            jParams,
            numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,
            cp.checkpointInterval,
            cp.checkpointPath,
@@ -61,8 +64,7 @@ object XGBoost {
      getOrElse(
        JXGBoost.train(
          dtrain.jDMatrix,
-          // we have to filter null value for customized obj and eval
-          params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
+          jParams,
          numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)
      )
    if (prevBooster == null) {