From 003b418312c62ce5f31948afa85f39535a50eaef Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Tue, 23 Jul 2024 12:15:51 +0800 Subject: [PATCH] [jvm-packages] clean up example (#10618) --- .../scala/example/BasicWalkThrough.scala | 4 ++-- .../scala/example/BoostFromPrediction.scala | 7 ++++--- .../scala/example/CrossValidation.scala | 5 ++--- .../scala/example/CustomObjective.scala | 9 +++++---- .../scala/example/ExternalMemory.scala | 6 +++--- .../scala/example/GeneralizedLinearModel.scala | 5 ++--- .../scala/example/PredictFirstNTree.scala | 4 ++-- .../scala/example/PredictLeafIndices.scala | 8 +++----- .../example/flink/DistTrainWithFlink.scala | 12 +++++++----- .../example/spark/SparkMLlibPipeline.scala | 5 ++--- .../scala/example/spark/SparkTraining.scala | 12 +++++++----- .../scala/example/util/CustomEval.scala | 3 ++- .../flink/DistTrainWithFlinkExampleTest.scala | 5 +++-- .../flink/DistTrainWithFlinkSuite.scala | 7 ++++--- .../example/spark/SparkExamplesTest.scala | 18 ++++++++++-------- 15 files changed, 58 insertions(+), 52 deletions(-) diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala index 1893288b4..4629fa352 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014-2023 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ import scala.collection.mutable import ml.dmlc.xgboost4j.java.{DMatrix => JDMatrix} import ml.dmlc.xgboost4j.java.example.util.DataLoader -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} object BasicWalkThrough { def saveDumpModel(modelPath: String, modelInfos: Array[String]): Unit = { diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala index 09b72fc50..11f024a4d 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,7 +18,8 @@ package ml.dmlc.xgboost4j.scala.example import scala.collection.mutable -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} + object BoostFromPrediction { def main(args: Array[String]): Unit = { @@ -48,6 +49,6 @@ object BoostFromPrediction { testMat.setBaseMargin(testPred) System.out.println("result of running from initial prediction") - val booster2 = XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null) + XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null) } } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala index 6083209ec..69d0d37fb 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala @@ -17,7 +17,7 @@ package ml.dmlc.xgboost4j.scala.example import scala.collection.mutable -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} object CrossValidation { def main(args: Array[String]): Unit = { @@ -40,7 +40,6 @@ object CrossValidation { // set additional eval_metrics val metrics: Array[String] = null - val evalHist: Array[String] = - XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics) + XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics) } } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala index 8cc49c90d..dbb49254b 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ package ml.dmlc.xgboost4j.scala.example import scala.collection.mutable import scala.collection.mutable.ListBuffer +import org.apache.commons.logging.{Log, LogFactory} + import ml.dmlc.xgboost4j.java.XGBoostError -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix, EvalTrait, ObjectiveTrait} -import org.apache.commons.logging.{LogFactory, Log} +import ml.dmlc.xgboost4j.scala.{DMatrix, EvalTrait, ObjectiveTrait, XGBoost} /** * an example user define objective and eval @@ -150,7 +151,7 @@ object CustomObjective { val round = 2 // train a model - val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap) + XGBoost.train(trainMat, params.toMap, round, watches.toMap) XGBoost.train(trainMat, params.toMap, round, watches.toMap, obj = new LogRegObj, eval = new EvalError) } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala index c7f3d8bbb..d35715e3c 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala.example import scala.collection.mutable -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} object ExternalMemory { def main(args: Array[String]): Unit = { @@ -54,6 +54,6 @@ object ExternalMemory { testMat.setBaseMargin(testPred) System.out.println("result of running from initial prediction") - val booster2 = XGBoost.train(trainMat, params.toMap, 1, watches.toMap) + XGBoost.train(trainMat, params.toMap, 1, watches.toMap) } } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala index e370010b6..70897146c 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ package ml.dmlc.xgboost4j.scala.example import scala.collection.mutable -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} import ml.dmlc.xgboost4j.scala.example.util.CustomEval @@ -51,7 +51,6 @@ object GeneralizedLinearModel { watches += "train" -> trainMat watches += "test" -> testMat - val round = 4 val booster = XGBoost.train(trainMat, params.toMap, 1, watches.toMap) val predicts = booster.predict(testMat) val eval = new CustomEval diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala index 40a5ffc44..2bd6a845d 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ package ml.dmlc.xgboost4j.scala.example import scala.collection.mutable +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} import ml.dmlc.xgboost4j.scala.example.util.CustomEval -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} object PredictFirstNTree { diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala index 7ae2e6520..ca523f175 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,11 +16,9 @@ package ml.dmlc.xgboost4j.scala.example -import java.util - import scala.collection.mutable -import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} +import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost} object PredictLeafIndices { @@ -49,7 +47,7 @@ object PredictLeafIndices { // predict all trees val leafIndex2 = booster.predictLeaf(testMat, 0) - for (leafs <- leafIndex) { + for (leafs <- leafIndex2) { println(java.util.Arrays.toString(leafs)) } } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala index 3bfefb841..b8a922572 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 - 2023 by Contributors + Copyright (c) 2014 - 2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,12 +17,14 @@ package ml.dmlc.xgboost4j.scala.example.flink import java.lang.{Double => JDouble, Long => JLong} import java.nio.file.{Path, Paths} -import org.apache.flink.api.java.tuple.{Tuple13, Tuple2} -import org.apache.flink.api.java.{DataSet, ExecutionEnvironment} -import org.apache.flink.ml.linalg.{Vector, Vectors} -import ml.dmlc.xgboost4j.java.flink.{XGBoost, XGBoostModel} + import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation} +import org.apache.flink.api.java.{DataSet, ExecutionEnvironment} +import org.apache.flink.api.java.tuple.{Tuple13, Tuple2} import org.apache.flink.api.java.utils.DataSetUtils +import org.apache.flink.ml.linalg.{Vector, Vectors} + +import ml.dmlc.xgboost4j.java.flink.{XGBoost, XGBoostModel} object DistTrainWithFlink { diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala index ae59af571..26a68f085 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala @@ -22,6 +22,7 @@ import org.apache.spark.ml.feature._ import org.apache.spark.ml.tuning._ import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.types._ + import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier} // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris) @@ -87,11 +88,9 @@ object SparkMLlibPipeline { "max_depth" -> 2, "objective" -> "multi:softprob", "num_class" -> 3, - "num_round" -> 100, - "num_workers" -> numWorkers, "device" -> device ) - ) + ).setNumRound(10).setNumWorkers(numWorkers) booster.setFeaturesCol("features") booster.setLabelCol("classIndex") val labelConverter = new IndexToString() diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala index 67a9f7e23..e2d49d086 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala @@ -16,11 +16,13 @@ package ml.dmlc.xgboost4j.scala.example.spark -import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler} import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType} +import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier + + // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris) object SparkTraining { @@ -78,13 +80,13 @@ private[spark] def run(spark: SparkSession, inputPath: String, "max_depth" -> 2, "objective" -> "multi:softprob", "num_class" -> 3, - "num_round" -> 100, - "num_workers" -> numWorkers, - "device" -> device, - "eval_sets" -> Map("eval1" -> eval1, "eval2" -> eval2)) + "eval_sets" -> Map("eval1" -> eval1, "eval2" -> eval2), + "device" -> device) val xgbClassifier = new XGBoostClassifier(xgbParam). setFeaturesCol("features"). setLabelCol("classIndex") + .setNumWorkers(numWorkers) + .setNumRound(10) val xgbClassificationModel = xgbClassifier.fit(train) xgbClassificationModel.transform(test) } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/util/CustomEval.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/util/CustomEval.scala index 6fb233c2a..1b4a8e99a 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/util/CustomEval.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/util/CustomEval.scala @@ -15,9 +15,10 @@ */ package ml.dmlc.xgboost4j.scala.example.util +import org.apache.commons.logging.{Log, LogFactory} + import ml.dmlc.xgboost4j.java.XGBoostError import ml.dmlc.xgboost4j.scala.{DMatrix, EvalTrait} -import org.apache.commons.logging.{Log, LogFactory} class CustomEval extends EvalTrait { private val logger: Log = LogFactory.getLog(classOf[CustomEval]) diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala index b9929639f..cdd5f0803 100644 --- a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014-2023 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,12 +15,13 @@ */ package ml.dmlc.xgboost4j.java.example.flink +import java.nio.file.Paths + import org.apache.flink.api.java.ExecutionEnvironment import org.scalatest.Inspectors._ import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers._ -import java.nio.file.Paths class DistTrainWithFlinkExampleTest extends AnyFunSuite { private val parentPath = Paths.get("../../").resolve("demo").resolve("data") diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala index d9e98d81c..cbc424fe4 100644 --- a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala @@ -15,14 +15,15 @@ */ package ml.dmlc.xgboost4j.scala.example.flink +import java.nio.file.Paths + +import scala.jdk.CollectionConverters._ + import org.apache.flink.api.java.ExecutionEnvironment import org.scalatest.Inspectors._ import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers._ -import java.nio.file.Paths -import scala.jdk.CollectionConverters._ - class DistTrainWithFlinkSuite extends AnyFunSuite { private val parentPath = Paths.get("../../").resolve("demo").resolve("data") private val data = parentPath.resolve("veterans_lung_cancer.csv") diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala index 2e87bf066..aa8fc4a22 100644 --- a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014-2023 by Contributors + Copyright (c) 2014-2024 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,16 +15,18 @@ */ package ml.dmlc.xgboost4j.scala.example.spark + +import java.io.File +import java.nio.file.{Files, StandardOpenOption} + +import scala.jdk.CollectionConverters._ +import scala.util.{Random, Try} + import org.apache.spark.sql.SparkSession import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite import org.slf4j.LoggerFactory -import java.io.File -import java.nio.file.{Files, StandardOpenOption} -import scala.jdk.CollectionConverters._ -import scala.util.{Random, Try} - class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll { private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest]) private val random = new Random(42) @@ -53,7 +55,7 @@ class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll { } if (spark == null) { - spark = SparkSession + spark = SparkSession .builder() .appName("XGBoost4J-Spark Pipeline Example") .master(s"local[${numWorkers}]") @@ -92,7 +94,7 @@ class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll { e ) true - } + } } private def cleanExternalCache(prefix: String): Unit = {