force the user to set number of workers

This commit is contained in:
CodingCat
2016-03-12 13:33:57 -05:00
parent 980898f3fb
commit 16b9e92328
6 changed files with 20 additions and 26 deletions

View File

@@ -45,8 +45,10 @@ object XGBoost extends Serializable {
import DataUtils._
val partitionedData = {
if (numWorkers > trainingData.partitions.length) {
logger.info(s"repartitioning training set to $numWorkers partitions")
trainingData.repartition(numWorkers)
} else if (numWorkers < trainingData.partitions.length) {
logger.info(s"repartitioning training set to $numWorkers partitions")
trainingData.coalesce(numWorkers)
} else {
trainingData
@@ -79,7 +81,9 @@ object XGBoost extends Serializable {
*/
@throws(classOf[XGBoostError])
def train(trainingData: RDD[LabeledPoint], configMap: Map[String, Any], round: Int,
nWorkers: Int = 0, obj: ObjectiveTrait = null, eval: EvalTrait = null): XGBoostModel = {
nWorkers: Int, obj: ObjectiveTrait = null, eval: EvalTrait = null): XGBoostModel = {
require(nWorkers > 0, "you must specify more than 0 workers")
val tracker = new RabitTracker(nWorkers)
implicit val sc = trainingData.sparkContext
var overridedConfMap = configMap
if (overridedConfMap.contains("nthread")) {
@@ -91,17 +95,9 @@ object XGBoost extends Serializable {
} else {
overridedConfMap = configMap + ("nthread" -> sc.getConf.get("spark.task.cpus", "1").toInt)
}
val numWorkers = {
if (nWorkers > 0) {
nWorkers
} else {
trainingData.partitions.length
}
}
val tracker = new RabitTracker(numWorkers)
require(tracker.start(), "FAULT: Failed to start tracker")
val boosters = buildDistributedBoosters(trainingData, overridedConfMap,
tracker.getWorkerEnvs.asScala, numWorkers, round, obj, eval)
tracker.getWorkerEnvs.asScala, nWorkers, round, obj, eval)
val sparkJobThread = new Thread() {
override def run() {
// force the job

View File

@@ -148,7 +148,7 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
val tempFile = Files.createTempFile(tempDir, "", "")
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
"objective" -> "binary:logistic").toMap
val xgBoostModel = XGBoost.train(trainingRDD, paramMap, 5)
val xgBoostModel = XGBoost.train(trainingRDD, paramMap, 5, numWorkers)
assert(eval.eval(xgBoostModel.predict(testSetDMatrix), testSetDMatrix) < 0.1)
xgBoostModel.saveModelAsHadoopFile(tempFile.toFile.getAbsolutePath)
val loadedXGBooostModel = XGBoost.loadModelFromHadoopFile(tempFile.toFile.getAbsolutePath)
@@ -167,7 +167,7 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
"objective" -> "binary:logistic", "nthread" -> 6).toMap
intercept[IllegalArgumentException] {
XGBoost.train(trainingRDD, paramMap, 5)
XGBoost.train(trainingRDD, paramMap, 5, numWorkers)
}
customSparkContext.stop()
}