diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala index fdf82f540..9bed82072 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 by Contributors + Copyright (c) 2021-2022 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { StructField("f10", FloatType), StructField("f11", FloatType), StructField("f12", FloatType), StructField(labelName, FloatType) )) - val featureNames = schema.fieldNames.filter(s => !s.equals(labelName)).toSeq + val featureNames = schema.fieldNames.filter(s => !s.equals(labelName)) test("The transform result should be same for several runs on same model") { withGpuSparkSession(enableCsvConf()) { spark => @@ -90,7 +90,7 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) val classifier = new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .setTreeMethod("gpu_hist") (classifier.fit(rawInput), testDf) @@ -155,12 +155,12 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { "please refer to setFeaturesCols")) val left = cpuModel - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .transform(testDf) .collect() val right = cpuModelFromFile - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .transform(testDf) .collect() @@ -177,7 +177,7 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) val classifier = new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .setTreeMethod("gpu_hist") classifier.fit(rawInput) diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostGeneralSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostGeneralSuite.scala index f6a45e9f7..53cdcb923 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostGeneralSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostGeneralSuite.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 by Contributors + Copyright (c) 2021-2022 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { private val labelName = "label_col" private val weightName = "weight_col" private val baseMarginName = "margin_col" - private val featureNames = Seq("f1", "f2", "f3") + private val featureNames = Array("f1", "f2", "f3") private val allColumnNames = featureNames :+ weightName :+ baseMarginName :+ labelName private val trainingData = Seq( // f1, f2, f3, weight, margin, label @@ -68,7 +68,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "gpu_hist") new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .fit(trainingDf) } @@ -84,7 +84,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { "num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "gpu_hist") val thrown1 = intercept[IllegalArgumentException] { new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .fit(trainingDf) } @@ -93,7 +93,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { trainingDf = originalDf.withColumn(labelName, col(labelName).cast(StringType)) val thrown2 = intercept[IllegalArgumentException] { new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .fit(trainingDf) } @@ -117,7 +117,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { val thrown1 = intercept[IllegalArgumentException] { new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .fit(trainingDf) } assert(thrown1.getMessage.contains("label does not exist.")) @@ -132,7 +132,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { "num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "hist") val thrown = intercept[IllegalArgumentException] { new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .fit(trainingDf) } @@ -149,7 +149,7 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "gpu_hist") val model1 = new XGBoostClassifier(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .setEvalSets(Map("eval1" -> eval1, "eval2" -> eval2)) .fit(trainingDf) @@ -166,7 +166,6 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { test("test persistence of XGBoostClassifier and XGBoostClassificationModel") { val xgbcPath = new File(tempDir.toFile, "xgbc").getPath withGpuSparkSession() { spark => - import spark.implicits._ val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "gpu_hist", "features_cols" -> featureNames, "label_col" -> labelName) @@ -174,7 +173,10 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite { xgbc.write.overwrite().save(xgbcPath) val paramMap2 = XGBoostClassifier.load(xgbcPath).MLlib2XGBoostParams xgbParam.foreach { - case (k, v) => assert(v.toString == paramMap2(k).toString) + case (k, v: Array[String]) => + assert(v.sameElements(paramMap2(k).asInstanceOf[Array[String]])) + case (k, v) => + assert(v.toString == paramMap2(k).toString) } } } diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala index ccc7d5d3a..18f35ee87 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala @@ -35,7 +35,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite { StructField("f3", FloatType), StructField(groupName, IntegerType))) val featureNames = schema.fieldNames.filter(s => - !(s.equals(labelName) || s.equals(groupName))).toSeq + !(s.equals(labelName) || s.equals(groupName))) test("The transform result should be same for several runs on same model") { withGpuSparkSession(enableCsvConf()) { spark => diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifier.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifier.scala index a442f739b..4d03b309c 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifier.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifier.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014,2021 by Contributors + Copyright (c) 2014-2022 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -148,7 +148,7 @@ class XGBoostClassifier ( * This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires * all feature columns must be numeric types. */ - def setFeaturesCols(value: Seq[String]): this.type = + def setFeaturesCol(value: Array[String]): this.type = set(featuresCols, value) // called at the start of fit/train when 'eval_metric' is not defined @@ -264,7 +264,7 @@ class XGBoostClassificationModel private[ml]( * This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires * all feature columns must be numeric types. */ - def setFeaturesCols(value: Seq[String]): this.type = + def setFeaturesCol(value: Array[String]): this.type = set(featuresCols, value) /** diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala index 799dd31ba..3ca1e7988 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014,2021 by Contributors + Copyright (c) 2014-2022 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -150,7 +150,7 @@ class XGBoostRegressor ( * This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires * all feature columns must be numeric types. */ - def setFeaturesCols(value: Seq[String]): this.type = + def setFeaturesCols(value: Array[String]): this.type = set(featuresCols, value) // called at the start of fit/train when 'eval_metric' is not defined @@ -257,7 +257,7 @@ class XGBoostRegressionModel private[ml] ( * This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires * all feature columns must be numeric types. */ - def setFeaturesCols(value: Seq[String]): this.type = + def setFeaturesCols(value: Array[String]): this.type = set(featuresCols, value) /** diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GpuParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GpuParams.scala index 3c139be09..9ab4c7357 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GpuParams.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GpuParams.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 by Contributors + Copyright (c) 2021-2022 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,38 +16,19 @@ package ml.dmlc.xgboost4j.scala.spark.params -import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods.{compact, parse, render} - -import org.apache.spark.ml.param.{BooleanParam, Param, Params} +import org.apache.spark.ml.param.{Params, StringArrayParam} trait GpuParams extends Params { /** - * Param for the names of feature columns. + * Param for the names of feature columns for GPU pipeline. * @group param */ - final val featuresCols: StringSeqParam = new StringSeqParam(this, "featuresCols", - "a sequence of feature column names.") + final val featuresCols: StringArrayParam = new StringArrayParam(this, "featuresCols", + "an array of feature column names for GPU pipeline.") - setDefault(featuresCols, Seq.empty[String]) + setDefault(featuresCols, Array.empty[String]) /** @group getParam */ - final def getFeaturesCols: Seq[String] = $(featuresCols) + final def getFeaturesCols: Array[String] = $(featuresCols) } - -class StringSeqParam( - parent: Params, - name: String, - doc: String) extends Param[Seq[String]](parent, name, doc) { - - override def jsonEncode(value: Seq[String]): String = { - import org.json4s.JsonDSL._ - compact(render(value)) - } - - override def jsonDecode(json: String): Seq[String] = { - implicit val formats = DefaultFormats - parse(json).extract[Seq[String]] - } -}