impose shuffle when creating training RDD (#1531)

This commit is contained in:
Nan Zhu 2016-08-31 07:34:10 -04:00 committed by GitHub
parent 3f198b9fef
commit 7fb3fbf577

View File

@ -70,12 +70,9 @@ object XGBoost extends Serializable {
useExternalMemory: Boolean, missing: Float = Float.NaN): RDD[Booster] = {
import DataUtils._
val partitionedData = {
if (numWorkers > trainingData.partitions.length) {
if (numWorkers != trainingData.partitions.length) {
logger.info(s"repartitioning training set to $numWorkers partitions")
trainingData.repartition(numWorkers)
} else if (numWorkers < trainingData.partitions.length) {
logger.info(s"repartitioning training set to $numWorkers partitions")
trainingData.coalesce(numWorkers)
} else {
trainingData
}