Add new parameter singlePrecisionHistogram to xgboost4j-spark (#5811)

Expose the existing 'singlePrecisionHistogram' param to the Spark layer.
This commit is contained in:
Zhang Zhang 2020-07-08 16:29:35 -07:00 committed by GitHub
parent 0d411b0397
commit 1813804e36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 1 deletions

View File

@ -141,6 +141,9 @@ class XGBoostClassifier (
def setCustomEval(value: EvalTrait): this.type = set(customEval, value)
def setSinglePrecisionHistogram(value: Boolean): this.type =
set(singlePrecisionHistogram, value)
// called at the start of fit/train when 'eval_metric' is not defined
private def setupDefaultEvalMetric(): String = {
require(isDefined(objective), "Users must set \'objective\' via xgboostParams.")

View File

@ -145,6 +145,9 @@ class XGBoostRegressor (
def setCustomEval(value: EvalTrait): this.type = set(customEval, value)
def setSinglePrecisionHistogram(value: Boolean): this.type =
set(singlePrecisionHistogram, value)
// called at the start of fit/train when 'eval_metric' is not defined
private def setupDefaultEvalMetric(): String = {
require(isDefined(objective), "Users must set \'objective\' via xgboostParams.")

View File

@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala.spark.params
import scala.collection.immutable.HashSet
import org.apache.spark.ml.param.{DoubleParam, IntParam, Param, Params}
import org.apache.spark.ml.param.{DoubleParam, IntParam, BooleanParam, Param, Params}
private[spark] trait BoosterParams extends Params {
@ -173,6 +173,14 @@ private[spark] trait BoosterParams extends Params {
final def getMaxBins: Int = $(maxBins)
/**
* whether to build histograms using single precision floating point values
*/
final val singlePrecisionHistogram = new BooleanParam(this, "singlePrecisionHistogram",
"whether to use single precision to build histograms")
final def getSinglePrecisionHistogram: Boolean = $(singlePrecisionHistogram)
/**
* This is only used for approximate greedy algorithm.
* This roughly translated into O(1 / sketch_eps) number of bins. Compared to directly select