impose shuffle when creating training RDD (#1531)

This commit is contained in:
Nan Zhu 2016-08-31 07:34:10 -04:00 committed by GitHub
parent 3f198b9fef
commit 7fb3fbf577

View File

@ -70,12 +70,9 @@ object XGBoost extends Serializable {
useExternalMemory: Boolean, missing: Float = Float.NaN): RDD[Booster] = { useExternalMemory: Boolean, missing: Float = Float.NaN): RDD[Booster] = {
import DataUtils._ import DataUtils._
val partitionedData = { val partitionedData = {
if (numWorkers > trainingData.partitions.length) { if (numWorkers != trainingData.partitions.length) {
logger.info(s"repartitioning training set to $numWorkers partitions") logger.info(s"repartitioning training set to $numWorkers partitions")
trainingData.repartition(numWorkers) trainingData.repartition(numWorkers)
} else if (numWorkers < trainingData.partitions.length) {
logger.info(s"repartitioning training set to $numWorkers partitions")
trainingData.coalesce(numWorkers)
} else { } else {
trainingData trainingData
} }