[jvm-packages] separate classification and regression model and integrate with ML package (#1608)

This commit is contained in:
Nan Zhu
2016-09-30 11:49:03 -04:00
committed by GitHub
parent 3b9987ca9c
commit 1673bcbe7e
16 changed files with 771 additions and 381 deletions

View File

@@ -20,6 +20,8 @@ import ml.dmlc.xgboost4j.scala.{Booster, DMatrix}
import ml.dmlc.xgboost4j.scala.spark.{DataUtils, XGBoost}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.ml.linalg.{DenseVector => MLDenseVector}
import org.apache.spark.ml.feature.{LabeledPoint => MLLabeledPoint}
object SparkWithRDD {
def main(args: Array[String]): Unit = {
@@ -38,8 +40,10 @@ object SparkWithRDD {
// number of iterations
val numRound = args(0).toInt
import DataUtils._
val trainRDD = MLUtils.loadLibSVMFile(sc, inputTrainPath)
val testSet = MLUtils.loadLibSVMFile(sc, inputTestPath).collect().iterator
val trainRDD = MLUtils.loadLibSVMFile(sc, inputTrainPath).map(lp =>
MLLabeledPoint(lp.label, new MLDenseVector(lp.features.toArray)))
val testSet = MLUtils.loadLibSVMFile(sc, inputTestPath).collect().map(
lp => new MLDenseVector(lp.features.toArray)).iterator
// training parameters
val paramMap = List(
"eta" -> 0.1f,