From 2454407f3a5bac21322d79e440fa1e0c31fbb21d Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Tue, 5 Apr 2022 13:35:33 +0800 Subject: [PATCH] [jvm-packages] unify setFeaturesCol API for XGBoostRegressor (#7784) --- .../xgboost4j/scala/rapids/spark/GpuUtils.scala | 2 +- .../rapids/spark/GpuXGBoostClassifierSuite.scala | 5 +++-- .../rapids/spark/GpuXGBoostRegressorSuite.scala | 13 +++++++------ .../xgboost4j/scala/spark/XGBoostRegressor.scala | 4 ++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala index c48f39f32..fdd1061a7 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala @@ -112,7 +112,7 @@ private[spark] object GpuUtils { val msg = if (fitting) "train" else "transform" // feature columns require(featureNames.nonEmpty, s"Gpu $msg requires features columns. " + - "please refer to setFeaturesCols!") + "please refer to `setFeaturesCol(value: Array[String])`!") featureNames.foreach(fn => checkNumericType(schema, fn)) if (fitting) { require(labelName.nonEmpty, "label column is not set.") diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala index 9bed82072..6ff1947b3 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala @@ -147,12 +147,13 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) // Since CPU model does not know the information about the features cols that GPU transform - // pipeline requires. End user needs to setFeaturesCols in the model manually + // pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model + // manually val thrown = intercept[IllegalArgumentException](cpuModel .transform(testDf) .collect()) assert(thrown.getMessage.contains("Gpu transform requires features columns. " + - "please refer to setFeaturesCols")) + "please refer to `setFeaturesCol(value: Array[String])`")) val left = cpuModel .setFeaturesCol(featureNames) diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala index 18f35ee87..2777c2ea6 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostRegressorSuite.scala @@ -86,7 +86,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite { .csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1) val classifier = new XGBoostRegressor(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .setTreeMethod("gpu_hist") (classifier.fit(rawInput), testDf) @@ -143,20 +143,21 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite { .csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1) // Since CPU model does not know the information about the features cols that GPU transform - // pipeline requires. End user needs to setFeaturesCols in the model manually + // pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model + // manually val thrown = intercept[IllegalArgumentException](cpuModel .transform(testDf) .collect()) assert(thrown.getMessage.contains("Gpu transform requires features columns. " + - "please refer to setFeaturesCols")) + "please refer to `setFeaturesCol(value: Array[String])`")) val left = cpuModel - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .transform(testDf) .collect() val right = cpuModelFromFile - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .transform(testDf) .collect() @@ -173,7 +174,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite { .csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1) val classifier = new XGBoostRegressor(xgbParam) - .setFeaturesCols(featureNames) + .setFeaturesCol(featureNames) .setLabelCol(labelName) .setTreeMethod("gpu_hist") classifier.fit(rawInput) diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala index 3ca1e7988..617aedfad 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressor.scala @@ -150,7 +150,7 @@ class XGBoostRegressor ( * This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires * all feature columns must be numeric types. */ - def setFeaturesCols(value: Array[String]): this.type = + def setFeaturesCol(value: Array[String]): this.type = set(featuresCols, value) // called at the start of fit/train when 'eval_metric' is not defined @@ -257,7 +257,7 @@ class XGBoostRegressionModel private[ml] ( * This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires * all feature columns must be numeric types. */ - def setFeaturesCols(value: Array[String]): this.type = + def setFeaturesCol(value: Array[String]): this.type = set(featuresCols, value) /**