[jvm-packages] xgboost4j-spark should work when featuresCols is specified (#7789)

This commit is contained in:
Bobby Wang
2022-04-08 13:21:04 +08:00
committed by GitHub
parent 729d227b89
commit 118192f116
12 changed files with 377 additions and 109 deletions

View File

@@ -126,7 +126,7 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val vectorAssembler = new VectorAssembler()
.setHandleInvalid("keep")
.setInputCols(featureNames.toArray)
.setInputCols(featureNames)
.setOutputCol("features")
val trainingDf = vectorAssembler.transform(rawInput).select("features", labelName)
@@ -149,11 +149,10 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
// Since CPU model does not know the information about the features cols that GPU transform
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
// manually
val thrown = intercept[IllegalArgumentException](cpuModel
val thrown = intercept[NoSuchElementException](cpuModel
.transform(testDf)
.collect())
assert(thrown.getMessage.contains("Gpu transform requires features columns. " +
"please refer to `setFeaturesCol(value: Array[String])`"))
assert(thrown.getMessage.contains("Failed to find a default value for featuresCols"))
val left = cpuModel
.setFeaturesCol(featureNames)
@@ -196,17 +195,16 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val featureColName = "feature_col"
val vectorAssembler = new VectorAssembler()
.setHandleInvalid("keep")
.setInputCols(featureNames.toArray)
.setInputCols(featureNames)
.setOutputCol(featureColName)
val testDf = vectorAssembler.transform(rawInput).select(featureColName, labelName)
// Since GPU model does not know the information about the features col name that CPU
// transform pipeline requires. End user needs to setFeaturesCol in the model manually
val thrown = intercept[IllegalArgumentException](
intercept[IllegalArgumentException](
gpuModel
.transform(testDf)
.collect())
assert(thrown.getMessage.contains("features does not exist"))
val left = gpuModel
.setFeaturesCol(featureColName)

View File

@@ -108,12 +108,15 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite {
val trainingDf = trainingData.toDF(allColumnNames: _*)
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "multi:softprob",
"num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "gpu_hist")
val thrown = intercept[IllegalArgumentException] {
// GPU train requires featuresCols. If not specified,
// then NoSuchElementException will be thrown
val thrown = intercept[NoSuchElementException] {
new XGBoostClassifier(xgbParam)
.setLabelCol(labelName)
.fit(trainingDf)
}
assert(thrown.getMessage.contains("Gpu train requires features columns."))
assert(thrown.getMessage.contains("Failed to find a default value for featuresCols"))
val thrown1 = intercept[IllegalArgumentException] {
new XGBoostClassifier(xgbParam)

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 by Contributors
Copyright (c) 2021-2022 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -122,7 +122,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
val vectorAssembler = new VectorAssembler()
.setHandleInvalid("keep")
.setInputCols(featureNames.toArray)
.setInputCols(featureNames)
.setOutputCol("features")
val trainingDf = vectorAssembler.transform(rawInput).select("features", labelName)
@@ -145,11 +145,10 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
// Since CPU model does not know the information about the features cols that GPU transform
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
// manually
val thrown = intercept[IllegalArgumentException](cpuModel
val thrown = intercept[NoSuchElementException](cpuModel
.transform(testDf)
.collect())
assert(thrown.getMessage.contains("Gpu transform requires features columns. " +
"please refer to `setFeaturesCol(value: Array[String])`"))
assert(thrown.getMessage.contains("Failed to find a default value for featuresCols"))
val left = cpuModel
.setFeaturesCol(featureNames)
@@ -192,17 +191,16 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
val featureColName = "feature_col"
val vectorAssembler = new VectorAssembler()
.setHandleInvalid("keep")
.setInputCols(featureNames.toArray)
.setInputCols(featureNames)
.setOutputCol(featureColName)
val testDf = vectorAssembler.transform(rawInput).select(featureColName, labelName)
// Since GPU model does not know the information about the features col name that CPU
// transform pipeline requires. End user needs to setFeaturesCol in the model manually
val thrown = intercept[IllegalArgumentException](
intercept[IllegalArgumentException](
gpuModel
.transform(testDf)
.collect())
assert(thrown.getMessage.contains("features does not exist"))
val left = gpuModel
.setFeaturesCol(featureColName)