[backport] Backport JVM fixes and document update to 1.6 (#7792)
* [jvm-packages] unify setFeaturesCol API for XGBoostRegressor (#7784) * [jvm-packages] add doc for xgboost4j-spark-gpu (#7779) Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com> * [jvm-packages] remove the dep of com.fasterxml.jackson (#7791) * [jvm-packages] xgboost4j-spark should work when featuresCols is specified (#7789) Co-authored-by: Bobby Wang <wbo4958@gmail.com>
This commit is contained in:
@@ -112,7 +112,7 @@ private[spark] object GpuUtils {
|
||||
val msg = if (fitting) "train" else "transform"
|
||||
// feature columns
|
||||
require(featureNames.nonEmpty, s"Gpu $msg requires features columns. " +
|
||||
"please refer to setFeaturesCols!")
|
||||
"please refer to `setFeaturesCol(value: Array[String])`!")
|
||||
featureNames.foreach(fn => checkNumericType(schema, fn))
|
||||
if (fitting) {
|
||||
require(labelName.nonEmpty, "label column is not set.")
|
||||
|
||||
@@ -126,7 +126,7 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
|
||||
|
||||
val vectorAssembler = new VectorAssembler()
|
||||
.setHandleInvalid("keep")
|
||||
.setInputCols(featureNames.toArray)
|
||||
.setInputCols(featureNames)
|
||||
.setOutputCol("features")
|
||||
val trainingDf = vectorAssembler.transform(rawInput).select("features", labelName)
|
||||
|
||||
@@ -147,12 +147,12 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
|
||||
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
|
||||
|
||||
// Since CPU model does not know the information about the features cols that GPU transform
|
||||
// pipeline requires. End user needs to setFeaturesCols in the model manually
|
||||
val thrown = intercept[IllegalArgumentException](cpuModel
|
||||
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
|
||||
// manually
|
||||
val thrown = intercept[NoSuchElementException](cpuModel
|
||||
.transform(testDf)
|
||||
.collect())
|
||||
assert(thrown.getMessage.contains("Gpu transform requires features columns. " +
|
||||
"please refer to setFeaturesCols"))
|
||||
assert(thrown.getMessage.contains("Failed to find a default value for featuresCols"))
|
||||
|
||||
val left = cpuModel
|
||||
.setFeaturesCol(featureNames)
|
||||
@@ -195,17 +195,16 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
|
||||
val featureColName = "feature_col"
|
||||
val vectorAssembler = new VectorAssembler()
|
||||
.setHandleInvalid("keep")
|
||||
.setInputCols(featureNames.toArray)
|
||||
.setInputCols(featureNames)
|
||||
.setOutputCol(featureColName)
|
||||
val testDf = vectorAssembler.transform(rawInput).select(featureColName, labelName)
|
||||
|
||||
// Since GPU model does not know the information about the features col name that CPU
|
||||
// transform pipeline requires. End user needs to setFeaturesCol in the model manually
|
||||
val thrown = intercept[IllegalArgumentException](
|
||||
intercept[IllegalArgumentException](
|
||||
gpuModel
|
||||
.transform(testDf)
|
||||
.collect())
|
||||
assert(thrown.getMessage.contains("features does not exist"))
|
||||
|
||||
val left = gpuModel
|
||||
.setFeaturesCol(featureColName)
|
||||
|
||||
@@ -108,12 +108,15 @@ class GpuXGBoostGeneralSuite extends GpuTestSuite {
|
||||
val trainingDf = trainingData.toDF(allColumnNames: _*)
|
||||
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "multi:softprob",
|
||||
"num_class" -> 3, "num_round" -> 5, "num_workers" -> 1, "tree_method" -> "gpu_hist")
|
||||
val thrown = intercept[IllegalArgumentException] {
|
||||
|
||||
// GPU train requires featuresCols. If not specified,
|
||||
// then NoSuchElementException will be thrown
|
||||
val thrown = intercept[NoSuchElementException] {
|
||||
new XGBoostClassifier(xgbParam)
|
||||
.setLabelCol(labelName)
|
||||
.fit(trainingDf)
|
||||
}
|
||||
assert(thrown.getMessage.contains("Gpu train requires features columns."))
|
||||
assert(thrown.getMessage.contains("Failed to find a default value for featuresCols"))
|
||||
|
||||
val thrown1 = intercept[IllegalArgumentException] {
|
||||
new XGBoostClassifier(xgbParam)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 by Contributors
|
||||
Copyright (c) 2021-2022 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -86,7 +86,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
|
||||
.csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1)
|
||||
|
||||
val classifier = new XGBoostRegressor(xgbParam)
|
||||
.setFeaturesCols(featureNames)
|
||||
.setFeaturesCol(featureNames)
|
||||
.setLabelCol(labelName)
|
||||
.setTreeMethod("gpu_hist")
|
||||
(classifier.fit(rawInput), testDf)
|
||||
@@ -122,7 +122,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
|
||||
|
||||
val vectorAssembler = new VectorAssembler()
|
||||
.setHandleInvalid("keep")
|
||||
.setInputCols(featureNames.toArray)
|
||||
.setInputCols(featureNames)
|
||||
.setOutputCol("features")
|
||||
val trainingDf = vectorAssembler.transform(rawInput).select("features", labelName)
|
||||
|
||||
@@ -143,20 +143,20 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
|
||||
.csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1)
|
||||
|
||||
// Since CPU model does not know the information about the features cols that GPU transform
|
||||
// pipeline requires. End user needs to setFeaturesCols in the model manually
|
||||
val thrown = intercept[IllegalArgumentException](cpuModel
|
||||
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
|
||||
// manually
|
||||
val thrown = intercept[NoSuchElementException](cpuModel
|
||||
.transform(testDf)
|
||||
.collect())
|
||||
assert(thrown.getMessage.contains("Gpu transform requires features columns. " +
|
||||
"please refer to setFeaturesCols"))
|
||||
assert(thrown.getMessage.contains("Failed to find a default value for featuresCols"))
|
||||
|
||||
val left = cpuModel
|
||||
.setFeaturesCols(featureNames)
|
||||
.setFeaturesCol(featureNames)
|
||||
.transform(testDf)
|
||||
.collect()
|
||||
|
||||
val right = cpuModelFromFile
|
||||
.setFeaturesCols(featureNames)
|
||||
.setFeaturesCol(featureNames)
|
||||
.transform(testDf)
|
||||
.collect()
|
||||
|
||||
@@ -173,7 +173,7 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
|
||||
.csv(getResourcePath("/rank.train.csv")).randomSplit(Array(0.7, 0.3), seed = 1)
|
||||
|
||||
val classifier = new XGBoostRegressor(xgbParam)
|
||||
.setFeaturesCols(featureNames)
|
||||
.setFeaturesCol(featureNames)
|
||||
.setLabelCol(labelName)
|
||||
.setTreeMethod("gpu_hist")
|
||||
classifier.fit(rawInput)
|
||||
@@ -191,17 +191,16 @@ class GpuXGBoostRegressorSuite extends GpuTestSuite {
|
||||
val featureColName = "feature_col"
|
||||
val vectorAssembler = new VectorAssembler()
|
||||
.setHandleInvalid("keep")
|
||||
.setInputCols(featureNames.toArray)
|
||||
.setInputCols(featureNames)
|
||||
.setOutputCol(featureColName)
|
||||
val testDf = vectorAssembler.transform(rawInput).select(featureColName, labelName)
|
||||
|
||||
// Since GPU model does not know the information about the features col name that CPU
|
||||
// transform pipeline requires. End user needs to setFeaturesCol in the model manually
|
||||
val thrown = intercept[IllegalArgumentException](
|
||||
intercept[IllegalArgumentException](
|
||||
gpuModel
|
||||
.transform(testDf)
|
||||
.collect())
|
||||
assert(thrown.getMessage.contains("features does not exist"))
|
||||
|
||||
val left = gpuModel
|
||||
.setFeaturesCol(featureColName)
|
||||
|
||||
Reference in New Issue
Block a user