Scala 2.13 support. (#9099)
1. Updated the test logic 2. Added smoke tests for Spark examples. 3. Added integration tests for Spark with Scala 2.13
This commit is contained in:
parent
8c174ef2d3
commit
a01df102c9
10
.github/workflows/jvm_tests.yml
vendored
10
.github/workflows/jvm_tests.yml
vendored
@ -75,3 +75,13 @@ jobs:
|
|||||||
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
||||||
env:
|
env:
|
||||||
RABIT_MOCK: ON
|
RABIT_MOCK: ON
|
||||||
|
|
||||||
|
|
||||||
|
- name: Build and Test XGBoost4J with scala 2.13
|
||||||
|
run: |
|
||||||
|
rm -rfv build/
|
||||||
|
cd jvm-packages
|
||||||
|
mvn -B clean install test -Pdefault,scala-2.13
|
||||||
|
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
||||||
|
env:
|
||||||
|
RABIT_MOCK: ON
|
||||||
|
|||||||
2
jvm-packages/.gitignore
vendored
2
jvm-packages/.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
tracker.py
|
tracker.py
|
||||||
build.sh
|
build.sh
|
||||||
|
xgboost4j-tester/pom.xml
|
||||||
|
xgboost4j-tester/iris.csv
|
||||||
|
|||||||
@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5.
|
|||||||
<version>latest_version_num</version>
|
<version>latest_version_num</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
or
|
||||||
|
```
|
||||||
|
<dependency>
|
||||||
|
<groupId>ml.dmlc</groupId>
|
||||||
|
<artifactId>xgboost4j_2.13</artifactId>
|
||||||
|
<version>latest_version_num</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>ml.dmlc</groupId>
|
||||||
|
<artifactId>xgboost4j-spark_2.13</artifactId>
|
||||||
|
<version>latest_version_num</version>
|
||||||
|
</dependency>
|
||||||
|
```
|
||||||
|
|
||||||
<b>sbt</b>
|
<b>sbt</b>
|
||||||
```sbt
|
```sbt
|
||||||
@ -47,7 +60,6 @@ libraryDependencies ++= Seq(
|
|||||||
|
|
||||||
For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases).
|
For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases).
|
||||||
|
|
||||||
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
|
|
||||||
|
|
||||||
### Access SNAPSHOT version
|
### Access SNAPSHOT version
|
||||||
|
|
||||||
@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency:
|
|||||||
<version>latest_version_num-SNAPSHOT</version>
|
<version>latest_version_num-SNAPSHOT</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
or with scala 2.13
|
||||||
|
```
|
||||||
|
<dependency>
|
||||||
|
<groupId>ml.dmlc</groupId>
|
||||||
|
<artifactId>xgboost4j_2.13</artifactId>
|
||||||
|
<version>latest_version_num-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>ml.dmlc</groupId>
|
||||||
|
<artifactId>xgboost4j-spark_2.13</artifactId>
|
||||||
|
<version>latest_version_num-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
```
|
||||||
|
|
||||||
<b>sbt</b>
|
<b>sbt</b>
|
||||||
```sbt
|
```sbt
|
||||||
@ -96,7 +121,9 @@ libraryDependencies ++= Seq(
|
|||||||
|
|
||||||
For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html).
|
For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html).
|
||||||
|
|
||||||
|
### GPU algorithm
|
||||||
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
|
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
|
||||||
|
Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12.
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
@ -34,6 +34,7 @@
|
|||||||
<maven.compiler.source>1.8</maven.compiler.source>
|
<maven.compiler.source>1.8</maven.compiler.source>
|
||||||
<maven.compiler.target>1.8</maven.compiler.target>
|
<maven.compiler.target>1.8</maven.compiler.target>
|
||||||
<flink.version>1.17.1</flink.version>
|
<flink.version>1.17.1</flink.version>
|
||||||
|
<junit.version>4.13.2</junit.version>
|
||||||
<spark.version>3.4.0</spark.version>
|
<spark.version>3.4.0</spark.version>
|
||||||
<spark.version.gpu>3.3.2</spark.version.gpu>
|
<spark.version.gpu>3.3.2</spark.version.gpu>
|
||||||
<scala.version>2.12.17</scala.version>
|
<scala.version>2.12.17</scala.version>
|
||||||
@ -45,7 +46,9 @@
|
|||||||
<cudf.version>23.04.0</cudf.version>
|
<cudf.version>23.04.0</cudf.version>
|
||||||
<spark.rapids.version>23.04.1</spark.rapids.version>
|
<spark.rapids.version>23.04.1</spark.rapids.version>
|
||||||
<cudf.classifier>cuda11</cudf.classifier>
|
<cudf.classifier>cuda11</cudf.classifier>
|
||||||
</properties>
|
<scalatest.version>3.2.16</scalatest.version>
|
||||||
|
<scala-collection-compat.version>2.9.0</scala-collection-compat.version>
|
||||||
|
</properties>
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
<id>central_maven</id>
|
<id>central_maven</id>
|
||||||
@ -71,6 +74,14 @@
|
|||||||
</modules>
|
</modules>
|
||||||
</profile>
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>scala-2.13</id>
|
||||||
|
<properties>
|
||||||
|
<scala.binary.version>2.13</scala.binary.version>
|
||||||
|
<scala.version>2.13.10</scala.version>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
|
||||||
<!-- gpu profile with both cpu and gpu test suites -->
|
<!-- gpu profile with both cpu and gpu test suites -->
|
||||||
<profile>
|
<profile>
|
||||||
<id>gpu</id>
|
<id>gpu</id>
|
||||||
@ -467,6 +478,7 @@
|
|||||||
</plugins>
|
</plugins>
|
||||||
</reporting>
|
</reporting>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.esotericsoftware</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
<artifactId>kryo</artifactId>
|
<artifactId>kryo</artifactId>
|
||||||
@ -483,6 +495,11 @@
|
|||||||
<artifactId>scala-library</artifactId>
|
<artifactId>scala-library</artifactId>
|
||||||
<version>${scala.version}</version>
|
<version>${scala.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scala-lang.modules</groupId>
|
||||||
|
<artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
|
||||||
|
<version>${scala-collection-compat.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-logging</groupId>
|
<groupId>commons-logging</groupId>
|
||||||
<artifactId>commons-logging</artifactId>
|
<artifactId>commons-logging</artifactId>
|
||||||
@ -491,13 +508,13 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scalatest</groupId>
|
<groupId>org.scalatest</groupId>
|
||||||
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
||||||
<version>3.2.16</version>
|
<version>${scalatest.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scalactic</groupId>
|
<groupId>org.scalactic</groupId>
|
||||||
<artifactId>scalactic_${scala.binary.version}</artifactId>
|
<artifactId>scalactic_${scala.binary.version}</artifactId>
|
||||||
<version>3.2.15</version>
|
<version>${scalatest.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|||||||
@ -5,10 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<name>xgboost4j-example</name>
|
||||||
|
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@ -73,12 +73,13 @@ object DistTrainWithFlink {
|
|||||||
.map(_.f1.f0)
|
.map(_.f1.f0)
|
||||||
.returns(testDataTypeHint)
|
.returns(testDataTypeHint)
|
||||||
|
|
||||||
val paramMap = mapAsJavaMap(Map(
|
val paramMap = Map(
|
||||||
("eta", "0.1".asInstanceOf[AnyRef]),
|
("eta", "0.1".asInstanceOf[AnyRef]),
|
||||||
("max_depth", "2"),
|
("max_depth", "2"),
|
||||||
("objective", "binary:logistic"),
|
("objective", "binary:logistic"),
|
||||||
("verbosity", "1")
|
("verbosity", "1")
|
||||||
))
|
)
|
||||||
|
.asJava
|
||||||
|
|
||||||
// number of iterations
|
// number of iterations
|
||||||
val round = 2
|
val round = 2
|
||||||
|
|||||||
@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel}
|
|||||||
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
|
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
|
||||||
import org.apache.spark.ml.feature._
|
import org.apache.spark.ml.feature._
|
||||||
import org.apache.spark.ml.tuning._
|
import org.apache.spark.ml.tuning._
|
||||||
import org.apache.spark.sql.SparkSession
|
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
|
||||||
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel}
|
|
||||||
|
|
||||||
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
|
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
|
||||||
|
|
||||||
@ -50,6 +49,13 @@ object SparkMLlibPipeline {
|
|||||||
.appName("XGBoost4J-Spark Pipeline Example")
|
.appName("XGBoost4J-Spark Pipeline Example")
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
|
||||||
|
run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers)
|
||||||
|
.show(false)
|
||||||
|
}
|
||||||
|
private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,
|
||||||
|
pipelineModelPath: String, treeMethod: String,
|
||||||
|
numWorkers: Int): DataFrame = {
|
||||||
|
|
||||||
// Load dataset
|
// Load dataset
|
||||||
val schema = new StructType(Array(
|
val schema = new StructType(Array(
|
||||||
StructField("sepal length", DoubleType, true),
|
StructField("sepal length", DoubleType, true),
|
||||||
@ -90,11 +96,11 @@ object SparkMLlibPipeline {
|
|||||||
val labelConverter = new IndexToString()
|
val labelConverter = new IndexToString()
|
||||||
.setInputCol("prediction")
|
.setInputCol("prediction")
|
||||||
.setOutputCol("realLabel")
|
.setOutputCol("realLabel")
|
||||||
.setLabels(labelIndexer.labels)
|
.setLabels(labelIndexer.labelsArray(0))
|
||||||
|
|
||||||
val pipeline = new Pipeline()
|
val pipeline = new Pipeline()
|
||||||
.setStages(Array(assembler, labelIndexer, booster, labelConverter))
|
.setStages(Array(assembler, labelIndexer, booster, labelConverter))
|
||||||
val model = pipeline.fit(training)
|
val model: PipelineModel = pipeline.fit(training)
|
||||||
|
|
||||||
// Batch prediction
|
// Batch prediction
|
||||||
val prediction = model.transform(test)
|
val prediction = model.transform(test)
|
||||||
@ -136,6 +142,6 @@ object SparkMLlibPipeline {
|
|||||||
|
|
||||||
// Load a saved model and serving
|
// Load a saved model and serving
|
||||||
val model2 = PipelineModel.load(pipelineModelPath)
|
val model2 = PipelineModel.load(pipelineModelPath)
|
||||||
model2.transform(test).show(false)
|
model2.transform(test)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -17,9 +17,8 @@
|
|||||||
package ml.dmlc.xgboost4j.scala.example.spark
|
package ml.dmlc.xgboost4j.scala.example.spark
|
||||||
|
|
||||||
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
|
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
|
||||||
|
|
||||||
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
|
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
|
||||||
import org.apache.spark.sql.SparkSession
|
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||||
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
|
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
|
||||||
|
|
||||||
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
|
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
|
||||||
@ -38,6 +37,12 @@ object SparkTraining {
|
|||||||
|
|
||||||
val spark = SparkSession.builder().getOrCreate()
|
val spark = SparkSession.builder().getOrCreate()
|
||||||
val inputPath = args(0)
|
val inputPath = args(0)
|
||||||
|
val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers)
|
||||||
|
results.show()
|
||||||
|
}
|
||||||
|
|
||||||
|
private[spark] def run(spark: SparkSession, inputPath: String,
|
||||||
|
treeMethod: String, numWorkers: Int): DataFrame = {
|
||||||
val schema = new StructType(Array(
|
val schema = new StructType(Array(
|
||||||
StructField("sepal length", DoubleType, true),
|
StructField("sepal length", DoubleType, true),
|
||||||
StructField("sepal width", DoubleType, true),
|
StructField("sepal width", DoubleType, true),
|
||||||
@ -81,7 +86,6 @@ object SparkTraining {
|
|||||||
setFeaturesCol("features").
|
setFeaturesCol("features").
|
||||||
setLabelCol("classIndex")
|
setLabelCol("classIndex")
|
||||||
val xgbClassificationModel = xgbClassifier.fit(train)
|
val xgbClassificationModel = xgbClassifier.fit(train)
|
||||||
val results = xgbClassificationModel.transform(test)
|
xgbClassificationModel.transform(test)
|
||||||
results.show()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014-2023 by Contributors
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
package ml.dmlc.xgboost4j.scala.example.spark
|
||||||
|
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.scalatest.BeforeAndAfterAll
|
||||||
|
import org.scalatest.funsuite.AnyFunSuite
|
||||||
|
import org.slf4j.LoggerFactory
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import java.nio.file.{Files, StandardOpenOption}
|
||||||
|
import scala.jdk.CollectionConverters._
|
||||||
|
import scala.util.{Random, Try}
|
||||||
|
|
||||||
|
class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {
|
||||||
|
private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest])
|
||||||
|
private val random = new Random(42)
|
||||||
|
protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)
|
||||||
|
|
||||||
|
private val pathToTestDataset = Files.createTempFile("", "iris.csv").toAbsolutePath
|
||||||
|
private var spark: SparkSession = _
|
||||||
|
|
||||||
|
override def beforeAll(): Unit = {
|
||||||
|
|
||||||
|
def generateLine(i: Int): String = {
|
||||||
|
val getIrisName = (int: Int) => {
|
||||||
|
int % 3 match {
|
||||||
|
case 0 => "Iris-versicolor"
|
||||||
|
case 1 => "Iris-virginica"
|
||||||
|
case 2 => "Iris-setosa"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val generateValue = () => Math.abs(random.nextInt(99) * 0.1)
|
||||||
|
val sepalLength = generateValue()
|
||||||
|
val sepalWidth = generateValue()
|
||||||
|
val petalLength = generateValue()
|
||||||
|
val petalWidth = generateValue()
|
||||||
|
val irisName = getIrisName(Math.abs(random.nextInt()) + i)
|
||||||
|
s"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName"
|
||||||
|
}
|
||||||
|
|
||||||
|
if (spark == null) {
|
||||||
|
spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName("XGBoost4J-Spark Pipeline Example")
|
||||||
|
.master(s"local[${numWorkers}]")
|
||||||
|
.config("spark.ui.enabled", value = false)
|
||||||
|
.config("spark.driver.memory", "512m")
|
||||||
|
.config("spark.barrier.sync.timeout", 10)
|
||||||
|
.config("spark.task.cpus", 1)
|
||||||
|
.getOrCreate()
|
||||||
|
spark.sparkContext.setLogLevel("ERROR")
|
||||||
|
}
|
||||||
|
val data = (0 until 150)
|
||||||
|
.map(i => generateLine(i))
|
||||||
|
.toList
|
||||||
|
.asJava
|
||||||
|
Files.write(pathToTestDataset,
|
||||||
|
data,
|
||||||
|
StandardOpenOption.CREATE,
|
||||||
|
StandardOpenOption.WRITE,
|
||||||
|
StandardOpenOption.TRUNCATE_EXISTING)
|
||||||
|
logger.info(s"${new String(Files.readAllBytes(pathToTestDataset))}")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
override def afterAll(): Unit = {
|
||||||
|
if (spark != null) {
|
||||||
|
spark.stop()
|
||||||
|
cleanExternalCache(spark.sparkContext.appName)
|
||||||
|
spark = null
|
||||||
|
}
|
||||||
|
|
||||||
|
Try(Files.deleteIfExists(pathToTestDataset))
|
||||||
|
.recover {
|
||||||
|
case e =>
|
||||||
|
logger.warn(
|
||||||
|
s"Could not delete temporary file $pathToTestDataset. Please, remove it manually",
|
||||||
|
e
|
||||||
|
)
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private def cleanExternalCache(prefix: String): Unit = {
|
||||||
|
val dir = new File(".")
|
||||||
|
for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
|
||||||
|
file.delete()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("Smoke test for SparkMLlibPipeline example") {
|
||||||
|
SparkMLlibPipeline.run(spark, pathToTestDataset.toString, "target/native-model",
|
||||||
|
"target/pipeline-model", "auto", 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("Smoke test for SparkTraining example") {
|
||||||
|
val spark = SparkSession
|
||||||
|
.builder()
|
||||||
|
.appName("XGBoost4J-Spark Pipeline Example")
|
||||||
|
.master(s"local[${numWorkers}]")
|
||||||
|
.config("spark.ui.enabled", value = false)
|
||||||
|
.config("spark.driver.memory", "512m")
|
||||||
|
.config("spark.barrier.sync.timeout", 10)
|
||||||
|
.config("spark.task.cpus", 1)
|
||||||
|
.getOrCreate()
|
||||||
|
|
||||||
|
SparkTraining.run(spark, pathToTestDataset.toString, "auto", 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -5,9 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
<name>xgboost4j-flink</name>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
<properties>
|
<properties>
|
||||||
|
|||||||
@ -5,10 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
|
<name>xgboost4j-gpu</name>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
@ -35,13 +36,13 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.13.2</version>
|
<version>${junit.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scalatest</groupId>
|
<groupId>org.scalatest</groupId>
|
||||||
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
||||||
<version>3.2.15</version>
|
<version>${scalatest.version}</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
@ -5,10 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<name>xgboost4j-spark-gpu</name>
|
||||||
|
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -24,7 +25,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@ -5,10 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<name>xgboost4j-spark</name>
|
||||||
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -24,7 +25,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@ -8,25 +8,28 @@ pom_template = """
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-tester_2.12</artifactId>
|
<artifactId>xgboost4j-tester_{scala_binary_version}</artifactId>
|
||||||
<version>1.0-SNAPSHOT</version>
|
<version>1.0-SNAPSHOT</version>
|
||||||
|
|
||||||
<name>xgboost4j-tester_2.12</name>
|
<name>xgboost4j-tester</name>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<maven.compiler.source>{maven_compiler_source}</maven.compiler.source>
|
<maven.compiler.source>{maven_compiler_source}</maven.compiler.source>
|
||||||
<maven.compiler.target>{maven_compiler_target}</maven.compiler.target>
|
<maven.compiler.target>{maven_compiler_target}</maven.compiler.target>
|
||||||
|
<junit.version>4.13.2</junit.version>
|
||||||
<spark.version>{spark_version}</spark.version>
|
<spark.version>{spark_version}</spark.version>
|
||||||
<scala.version>{scala_version}</scala.version>
|
<scala.version>{scala_version}</scala.version>
|
||||||
|
<scalatest.version>3.2.15</scalatest.version>
|
||||||
<scala.binary.version>{scala_binary_version}</scala.binary.version>
|
<scala.binary.version>{scala_binary_version}</scala.binary.version>
|
||||||
|
<kryo.version>5.5.0</kryo.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.esotericsoftware</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
<artifactId>kryo</artifactId>
|
<artifactId>kryo</artifactId>
|
||||||
<version>4.0.2</version>
|
<version>${{kryo.version}}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scala-lang</groupId>
|
<groupId>org.scala-lang</groupId>
|
||||||
@ -48,29 +51,12 @@ pom_template = """
|
|||||||
<artifactId>commons-logging</artifactId>
|
<artifactId>commons-logging</artifactId>
|
||||||
<version>1.2</version>
|
<version>1.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.typesafe.akka</groupId>
|
|
||||||
<artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
|
|
||||||
<version>2.6.20</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scalatest</groupId>
|
<groupId>org.scalatest</groupId>
|
||||||
<artifactId>scalatest_${{scala.binary.version}}</artifactId>
|
<artifactId>scalatest_${{scala.binary.version}}</artifactId>
|
||||||
<version>3.0.8</version>
|
<version>${{scalatest.version}}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.scalactic</groupId>
|
|
||||||
<artifactId>scalactic_${{scala.binary.version}}</artifactId>
|
|
||||||
<version>3.2.15</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-lang3</artifactId>
|
|
||||||
<version>3.9</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_${{scala.binary.version}}</artifactId>
|
<artifactId>spark-core_${{scala.binary.version}}</artifactId>
|
||||||
@ -92,7 +78,7 @@ pom_template = """
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.13.2</version>
|
<version>${{junit.version}}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -122,36 +108,9 @@ pom_template = """
|
|||||||
|
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-clean-plugin</artifactId>
|
|
||||||
<version>3.1.0</version>
|
|
||||||
</plugin>
|
|
||||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-resources-plugin</artifactId>
|
|
||||||
<version>3.0.2</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
|
||||||
<version>3.8.0</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-jar-plugin</artifactId>
|
|
||||||
<version>3.0.2</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-install-plugin</artifactId>
|
|
||||||
<version>2.5.2</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-deploy-plugin</artifactId>
|
|
||||||
<version>2.8.2</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-assembly-plugin</artifactId>
|
<artifactId>maven-assembly-plugin</artifactId>
|
||||||
<version>2.4</version>
|
|
||||||
<configuration>
|
<configuration>
|
||||||
<descriptorRefs>
|
<descriptorRefs>
|
||||||
<descriptorRef>jar-with-dependencies</descriptorRef>
|
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||||
@ -171,22 +130,12 @@ pom_template = """
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-site-plugin</artifactId>
|
|
||||||
<version>3.7.1</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
<version>2.22.1</version>
|
|
||||||
<configuration>
|
<configuration>
|
||||||
<dependenciesToScan>
|
<dependenciesToScan>
|
||||||
<dependency>ml.dmlc:xgboost4j_2.12</dependency>
|
<dependency>ml.dmlc:xgboost4j_${{scala.binary.version}}</dependency>
|
||||||
</dependenciesToScan>
|
</dependenciesToScan>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
|||||||
@ -1,20 +0,0 @@
|
|||||||
package ml.dmlc.xgboost4j.tester;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unit test for simple App.
|
|
||||||
*/
|
|
||||||
public class AppTest
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Rigorous Test :-)
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void shouldAnswerWithTrue()
|
|
||||||
{
|
|
||||||
assertTrue( true );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -5,10 +5,11 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<name>xgboost4j</name>
|
||||||
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>2.0.0-SNAPSHOT</version>
|
<version>2.0.0-SNAPSHOT</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
@ -28,13 +29,13 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.13.2</version>
|
<version>${junit.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scalatest</groupId>
|
<groupId>org.scalatest</groupId>
|
||||||
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
||||||
<version>3.2.16</version>
|
<version>${scalatest.version}</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|||||||
@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation {
|
|||||||
*/
|
*/
|
||||||
def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float
|
def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float
|
||||||
|
|
||||||
private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
|
def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
|
||||||
require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " +
|
require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " +
|
||||||
s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}")
|
s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}")
|
||||||
eval(predicts, new DMatrix(jdmat))
|
eval(predicts, new DMatrix(jdmat))
|
||||||
|
|||||||
@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective {
|
|||||||
*/
|
*/
|
||||||
def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]
|
def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]
|
||||||
|
|
||||||
private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
|
def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
|
||||||
java.util.List[Array[Float]] = {
|
java.util.List[Array[Float]] = {
|
||||||
getGradient(predicts, new DMatrix(dtrain)).asJava
|
getGradient(predicts, new DMatrix(dtrain)).asJava
|
||||||
}
|
}
|
||||||
|
|||||||
@ -17,12 +17,11 @@
|
|||||||
package ml.dmlc.xgboost4j.scala
|
package ml.dmlc.xgboost4j.scala
|
||||||
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost}
|
||||||
|
|
||||||
import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost}
|
import scala.jdk.CollectionConverters._
|
||||||
import scala.collection.JavaConverters._
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
import org.apache.hadoop.fs.Path
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* XGBoost Scala Training function.
|
* XGBoost Scala Training function.
|
||||||
@ -40,7 +39,12 @@ object XGBoost {
|
|||||||
earlyStoppingRound: Int = 0,
|
earlyStoppingRound: Int = 0,
|
||||||
prevBooster: Booster,
|
prevBooster: Booster,
|
||||||
checkpointParams: Option[ExternalCheckpointParams]): Booster = {
|
checkpointParams: Option[ExternalCheckpointParams]): Booster = {
|
||||||
val jWatches = watches.mapValues(_.jDMatrix).asJava
|
|
||||||
|
// we have to filter null value for customized obj and eval
|
||||||
|
val jParams: java.util.Map[String, AnyRef] =
|
||||||
|
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava
|
||||||
|
|
||||||
|
val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava
|
||||||
val jBooster = if (prevBooster == null) {
|
val jBooster = if (prevBooster == null) {
|
||||||
null
|
null
|
||||||
} else {
|
} else {
|
||||||
@ -51,8 +55,7 @@ object XGBoost {
|
|||||||
map(cp => {
|
map(cp => {
|
||||||
JXGBoost.trainAndSaveCheckpoint(
|
JXGBoost.trainAndSaveCheckpoint(
|
||||||
dtrain.jDMatrix,
|
dtrain.jDMatrix,
|
||||||
// we have to filter null value for customized obj and eval
|
jParams,
|
||||||
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
|
|
||||||
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,
|
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,
|
||||||
cp.checkpointInterval,
|
cp.checkpointInterval,
|
||||||
cp.checkpointPath,
|
cp.checkpointPath,
|
||||||
@ -61,8 +64,7 @@ object XGBoost {
|
|||||||
getOrElse(
|
getOrElse(
|
||||||
JXGBoost.train(
|
JXGBoost.train(
|
||||||
dtrain.jDMatrix,
|
dtrain.jDMatrix,
|
||||||
// we have to filter null value for customized obj and eval
|
jParams,
|
||||||
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
|
|
||||||
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)
|
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)
|
||||||
)
|
)
|
||||||
if (prevBooster == null) {
|
if (prevBooster == null) {
|
||||||
|
|||||||
@ -4,11 +4,18 @@ set -euo pipefail
|
|||||||
|
|
||||||
source tests/buildkite/conftest.sh
|
source tests/buildkite/conftest.sh
|
||||||
|
|
||||||
echo "--- Build XGBoost JVM packages"
|
echo "--- Build XGBoost JVM packages scala 2.12"
|
||||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||||
${SPARK_VERSION}
|
${SPARK_VERSION}
|
||||||
|
|
||||||
|
|
||||||
|
echo "--- Build XGBoost JVM packages scala 2.13"
|
||||||
|
|
||||||
|
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||||
|
${SPARK_VERSION} "" "" "true"
|
||||||
|
|
||||||
echo "--- Stash XGBoost4J JARs"
|
echo "--- Stash XGBoost4J JARs"
|
||||||
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
|
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
|
||||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
|
||||||
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
|
||||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
|
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
|
||||||
|
|||||||
@ -25,7 +25,7 @@ set -x
|
|||||||
CUDA_VERSION=11.8.0
|
CUDA_VERSION=11.8.0
|
||||||
NCCL_VERSION=2.16.5-1
|
NCCL_VERSION=2.16.5-1
|
||||||
RAPIDS_VERSION=23.02
|
RAPIDS_VERSION=23.02
|
||||||
SPARK_VERSION=3.1.1
|
SPARK_VERSION=3.4.0
|
||||||
JDK_VERSION=8
|
JDK_VERSION=8
|
||||||
|
|
||||||
if [[ -z ${BUILDKITE:-} ]]
|
if [[ -z ${BUILDKITE:-} ]]
|
||||||
|
|||||||
@ -20,10 +20,14 @@ RUN \
|
|||||||
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
||||||
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
||||||
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
|
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
|
||||||
# Spark
|
# Spark with scala 2.12
|
||||||
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
|
mkdir -p /opt/spark-scala-2.12 && \
|
||||||
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
|
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \
|
||||||
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
|
tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \
|
||||||
|
# Spark with scala 2.13
|
||||||
|
mkdir -p /opt/spark-scala-2.13 && \
|
||||||
|
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \
|
||||||
|
tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13
|
||||||
|
|
||||||
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
|
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ set -x
|
|||||||
spark_version=$1
|
spark_version=$1
|
||||||
use_cuda=$2
|
use_cuda=$2
|
||||||
gpu_arch=$3
|
gpu_arch=$3
|
||||||
|
use_scala213=$4
|
||||||
|
|
||||||
gpu_options=""
|
gpu_options=""
|
||||||
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
|
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
|
||||||
@ -22,7 +23,13 @@ export RABIT_MOCK=ON
|
|||||||
if [ "x$gpu_arch" != "x" ]; then
|
if [ "x$gpu_arch" != "x" ]; then
|
||||||
export GPU_ARCH_FLAG=$gpu_arch
|
export GPU_ARCH_FLAG=$gpu_arch
|
||||||
fi
|
fi
|
||||||
mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options
|
|
||||||
|
mvn_profile_string=""
|
||||||
|
if [ "x$use_scala213" != "x" ]; then
|
||||||
|
export mvn_profile_string="-Pdefault,scala-2.13"
|
||||||
|
fi
|
||||||
|
|
||||||
|
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
|
||||||
|
|
||||||
set +x
|
set +x
|
||||||
set +e
|
set +e
|
||||||
|
|||||||
@ -6,37 +6,56 @@ set -x
|
|||||||
# Initialize local Maven repository
|
# Initialize local Maven repository
|
||||||
./tests/ci_build/initialize_maven.sh
|
./tests/ci_build/initialize_maven.sh
|
||||||
|
|
||||||
# Get version number of XGBoost4J and other auxiliary information
|
|
||||||
cd jvm-packages
|
cd jvm-packages
|
||||||
|
jvm_packages_dir=`pwd`
|
||||||
|
# Get version number of XGBoost4J and other auxiliary information
|
||||||
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
|
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
|
||||||
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
|
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
|
||||||
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
|
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
|
||||||
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
|
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
|
||||||
scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
|
|
||||||
scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
|
|
||||||
|
|
||||||
# Install XGBoost4J JAR into local Maven repository
|
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
|
||||||
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
cd $jvm_packages_dir/xgboost4j-tester
|
||||||
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
|
|
||||||
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
|
||||||
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
|
||||||
|
|
||||||
cd xgboost4j-tester
|
|
||||||
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
|
|
||||||
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
|
|
||||||
# Run unit tests with XGBoost4J
|
|
||||||
mvn --no-transfer-progress package
|
|
||||||
|
|
||||||
# Run integration tests with XGBoost4J
|
|
||||||
java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar
|
|
||||||
|
|
||||||
# Run integration tests with XGBoost4J-Spark
|
|
||||||
if [ ! -z "$RUN_INTEGRATION_TEST" ]
|
|
||||||
then
|
|
||||||
python3 get_iris.py
|
python3 get_iris.py
|
||||||
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv
|
cd $jvm_packages_dir
|
||||||
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# including maven profiles for different scala versions: 2.12 is the default at the moment.
|
||||||
|
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
|
||||||
|
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
|
||||||
|
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
|
||||||
|
|
||||||
|
# Install XGBoost4J JAR into local Maven repository
|
||||||
|
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
||||||
|
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
|
||||||
|
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
||||||
|
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
||||||
|
|
||||||
|
cd xgboost4j-tester
|
||||||
|
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
|
||||||
|
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
|
||||||
|
# Build package and unit tests with XGBoost4J
|
||||||
|
mvn --no-transfer-progress clean package
|
||||||
|
xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar"
|
||||||
|
# Run integration tests with XGBoost4J
|
||||||
|
java -jar $xgboost4j_tester_jar
|
||||||
|
|
||||||
|
# Run integration tests with XGBoost4J-Spark
|
||||||
|
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
|
||||||
|
# Changing directory so that we do not mix code and resulting files
|
||||||
|
cd target
|
||||||
|
if [[ "$scala_binary_version" == "2.12" ]]; then
|
||||||
|
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
|
||||||
|
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
|
||||||
|
elif [[ "$scala_binary_version" == "2.13" ]]; then
|
||||||
|
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
|
||||||
|
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
|
||||||
|
else
|
||||||
|
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
cd $jvm_packages_dir
|
||||||
|
done
|
||||||
|
|
||||||
set +x
|
set +x
|
||||||
set +e
|
set +e
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user