[jvm-packages] unify setFeaturesCol API for XGBoostRegressor (#7784)

This commit is contained in:
Bobby Wang 2022-04-05 13:35:33 +08:00 committed by GitHub
parent e5ab8f3ebe
commit 2454407f3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 13 additions and 11 deletions

View File

@ -112,7 +112,7 @@ private[spark] object GpuUtils {
val msg = if (fitting) "train" else "transform"
// feature columns
require(featureNames.nonEmpty, s"Gpu $msg requires features columns. " +
"please refer to setFeaturesCols!")
"please refer to `setFeaturesCol(value: Array[String])`!")
featureNames.foreach(fn => checkNumericType(schema, fn))
if (fitting) {
require(labelName.nonEmpty, "label column is not set.")

View File

@ -147,12 +147,13 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
// Since CPU model does not know the information about the features cols that GPU transform
// pipeline requires. End user needs to setFeaturesCols in the model manually
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
// manually
val thrown = intercept[IllegalArgumentException](cpuModel
.transform(testDf)
.collect())
assert(thrown.getMessage.contains("Gpu transform requires features columns. " +
"please refer to setFeaturesCols"))
"please refer to `setFeaturesCol(value: Array[String])`"))
val left = cpuModel
.setFeaturesCol(featureNames)

View File

@ -86,7 +86,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
.csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1)
val classifier = new XGBoostRegressor(xgbParam)
.setFeaturesCols(featureNames)
.setFeaturesCol(featureNames)
.setLabelCol(labelName)
.setTreeMethod("gpu_hist")
(classifier.fit(rawInput), testDf)
@ -143,20 +143,21 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
.csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1)
// Since CPU model does not know the information about the features cols that GPU transform
// pipeline requires. End user needs to setFeaturesCols in the model manually
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
// manually
val thrown = intercept[IllegalArgumentException](cpuModel
.transform(testDf)
.collect())
assert(thrown.getMessage.contains("Gpu transform requires features columns. " +
"please refer to setFeaturesCols"))
"please refer to `setFeaturesCol(value: Array[String])`"))
val left = cpuModel
.setFeaturesCols(featureNames)
.setFeaturesCol(featureNames)
.transform(testDf)
.collect()
val right = cpuModelFromFile
.setFeaturesCols(featureNames)
.setFeaturesCol(featureNames)
.transform(testDf)
.collect()
@ -173,7 +174,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
.csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1)
val classifier = new XGBoostRegressor(xgbParam)
.setFeaturesCols(featureNames)
.setFeaturesCol(featureNames)
.setLabelCol(labelName)
.setTreeMethod("gpu_hist")
classifier.fit(rawInput)

View File

@ -150,7 +150,7 @@ class XGBoostRegressor (
* This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires
* all feature columns must be numeric types.
*/
def setFeaturesCols(value: Array[String]): this.type =
def setFeaturesCol(value: Array[String]): this.type =
set(featuresCols, value)
// called at the start of fit/train when 'eval_metric' is not defined
@ -257,7 +257,7 @@ class XGBoostRegressionModel private[ml] (
* This API is only used in GPU train pipeline of xgboost4j-spark-gpu, which requires
* all feature columns must be numeric types.
*/
def setFeaturesCols(value: Array[String]): this.type =
def setFeaturesCol(value: Array[String]): this.type =
set(featuresCols, value)
/**