force the user to set number of workers
This commit is contained in:
@@ -45,8 +45,10 @@ object XGBoost extends Serializable {
|
||||
import DataUtils._
|
||||
val partitionedData = {
|
||||
if (numWorkers > trainingData.partitions.length) {
|
||||
logger.info(s"repartitioning training set to $numWorkers partitions")
|
||||
trainingData.repartition(numWorkers)
|
||||
} else if (numWorkers < trainingData.partitions.length) {
|
||||
logger.info(s"repartitioning training set to $numWorkers partitions")
|
||||
trainingData.coalesce(numWorkers)
|
||||
} else {
|
||||
trainingData
|
||||
@@ -79,7 +81,9 @@ object XGBoost extends Serializable {
|
||||
*/
|
||||
@throws(classOf[XGBoostError])
|
||||
def train(trainingData: RDD[LabeledPoint], configMap: Map[String, Any], round: Int,
|
||||
nWorkers: Int = 0, obj: ObjectiveTrait = null, eval: EvalTrait = null): XGBoostModel = {
|
||||
nWorkers: Int, obj: ObjectiveTrait = null, eval: EvalTrait = null): XGBoostModel = {
|
||||
require(nWorkers > 0, "you must specify more than 0 workers")
|
||||
val tracker = new RabitTracker(nWorkers)
|
||||
implicit val sc = trainingData.sparkContext
|
||||
var overridedConfMap = configMap
|
||||
if (overridedConfMap.contains("nthread")) {
|
||||
@@ -91,17 +95,9 @@ object XGBoost extends Serializable {
|
||||
} else {
|
||||
overridedConfMap = configMap + ("nthread" -> sc.getConf.get("spark.task.cpus", "1").toInt)
|
||||
}
|
||||
val numWorkers = {
|
||||
if (nWorkers > 0) {
|
||||
nWorkers
|
||||
} else {
|
||||
trainingData.partitions.length
|
||||
}
|
||||
}
|
||||
val tracker = new RabitTracker(numWorkers)
|
||||
require(tracker.start(), "FAULT: Failed to start tracker")
|
||||
val boosters = buildDistributedBoosters(trainingData, overridedConfMap,
|
||||
tracker.getWorkerEnvs.asScala, numWorkers, round, obj, eval)
|
||||
tracker.getWorkerEnvs.asScala, nWorkers, round, obj, eval)
|
||||
val sparkJobThread = new Thread() {
|
||||
override def run() {
|
||||
// force the job
|
||||
|
||||
@@ -148,7 +148,7 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
|
||||
val tempFile = Files.createTempFile(tempDir, "", "")
|
||||
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
|
||||
"objective" -> "binary:logistic").toMap
|
||||
val xgBoostModel = XGBoost.train(trainingRDD, paramMap, 5)
|
||||
val xgBoostModel = XGBoost.train(trainingRDD, paramMap, 5, numWorkers)
|
||||
assert(eval.eval(xgBoostModel.predict(testSetDMatrix), testSetDMatrix) < 0.1)
|
||||
xgBoostModel.saveModelAsHadoopFile(tempFile.toFile.getAbsolutePath)
|
||||
val loadedXGBooostModel = XGBoost.loadModelFromHadoopFile(tempFile.toFile.getAbsolutePath)
|
||||
@@ -167,7 +167,7 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
|
||||
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
|
||||
"objective" -> "binary:logistic", "nthread" -> 6).toMap
|
||||
intercept[IllegalArgumentException] {
|
||||
XGBoost.train(trainingRDD, paramMap, 5)
|
||||
XGBoost.train(trainingRDD, paramMap, 5, numWorkers)
|
||||
}
|
||||
customSparkContext.stop()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user