[jvm-packages] use ML's para system to build the passed-in params to XGBoost (#2043)
* add back train method but mark as deprecated * fix scalastyle error * use ML's para system to build the passed-in params to XGBoost * clean
This commit is contained in:
@@ -32,7 +32,7 @@ import org.apache.spark.sql.{Dataset, Row}
|
||||
* XGBoost Estimator to produce a XGBoost model
|
||||
*/
|
||||
class XGBoostEstimator private[spark](
|
||||
override val uid: String, private[spark] var xgboostParams: Map[String, Any])
|
||||
override val uid: String, xgboostParams: Map[String, Any])
|
||||
extends Predictor[MLVector, XGBoostEstimator, XGBoostModel]
|
||||
with LearningTaskParams with GeneralParams with BoosterParams {
|
||||
|
||||
@@ -41,7 +41,6 @@ class XGBoostEstimator private[spark](
|
||||
|
||||
def this(uid: String) = this(uid, Map[String, Any]())
|
||||
|
||||
|
||||
// called in fromXGBParamMapToParams only when eval_metric is not defined
|
||||
private def setupDefaultEvalMetric(): String = {
|
||||
val objFunc = xgboostParams.getOrElse("objective", xgboostParams.getOrElse("obj_type", null))
|
||||
@@ -93,16 +92,11 @@ class XGBoostEstimator private[spark](
|
||||
|
||||
fromXGBParamMapToParams()
|
||||
|
||||
// only called when XGBParamMap is empty, i.e. in the constructor this(String)
|
||||
// TODO: refactor to be functional
|
||||
private def fromParamsToXGBParamMap(): Map[String, Any] = {
|
||||
require(xgboostParams.isEmpty, "fromParamsToXGBParamMap can only be called when" +
|
||||
" XGBParamMap is empty, i.e. in the constructor this(String)")
|
||||
private[spark] def fromParamsToXGBParamMap: Map[String, Any] = {
|
||||
val xgbParamMap = new mutable.HashMap[String, Any]()
|
||||
for (param <- params) {
|
||||
xgbParamMap += param.name -> $(param)
|
||||
}
|
||||
xgboostParams = xgbParamMap.toMap
|
||||
xgbParamMap.toMap
|
||||
}
|
||||
|
||||
@@ -116,8 +110,9 @@ class XGBoostEstimator private[spark](
|
||||
LabeledPoint(label, feature)
|
||||
}
|
||||
transformSchema(trainingSet.schema, logging = true)
|
||||
val trainedModel = XGBoost.trainWithRDD(instances, xgboostParams, $(round), $(nWorkers),
|
||||
$(customObj), $(customEval), $(useExternalMemory), $(missing)).setParent(this)
|
||||
val trainedModel = XGBoost.trainWithRDD(instances, fromParamsToXGBParamMap,
|
||||
$(round), $(nWorkers), $(customObj), $(customEval), $(useExternalMemory),
|
||||
$(missing)).setParent(this)
|
||||
val returnedModel = copyValues(trainedModel)
|
||||
if (XGBoost.isClassificationTask(xgboostParams)) {
|
||||
val numClass = {
|
||||
@@ -133,11 +128,6 @@ class XGBoostEstimator private[spark](
|
||||
}
|
||||
|
||||
override def copy(extra: ParamMap): XGBoostEstimator = {
|
||||
val est = defaultCopy(extra).asInstanceOf[XGBoostEstimator]
|
||||
// we need to synchronize the params here instead of in the constructor
|
||||
// because we cannot guarantee that params (default implementation) is initialized fully
|
||||
// before the other params
|
||||
est.fromParamsToXGBParamMap()
|
||||
est
|
||||
defaultCopy(extra).asInstanceOf[XGBoostEstimator]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,7 +196,7 @@ trait BoosterParams extends Params {
|
||||
minChildWeight -> 1, maxDeltaStep -> 0,
|
||||
subSample -> 1, colSampleByTree -> 1, colSampleByLevel -> 1,
|
||||
lambda -> 1, alpha -> 0, treeMethod -> "auto", sketchEps -> 0.03,
|
||||
scalePosWeight -> 1, sampleType -> "uniform", normalizeType -> "tree",
|
||||
scalePosWeight -> 1.0, sampleType -> "uniform", normalizeType -> "tree",
|
||||
rateDrop -> 0.0, skipDrop -> 0.0, lambdaBias -> 0)
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user