Skip to content
Closed
Next Next commit
[SPARK-16575] [spark core] partition calculation mismatch with sc.bin…
…aryFiles
  • Loading branch information
fidato13 committed Oct 2, 2016
commit 823aba185c562d08474c1aca846bf8de467beee4
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ private[spark] abstract class StreamFileInputFormat[T]
* which is set through setMaxSplitSize
*/
def setMinPartitions(context: JobContext, minPartitions: Int) {
val totalLen = listStatus(context).asScala.filterNot(_.isDirectory).map(_.getLen).sum
val maxSplitSize = math.ceil(totalLen / math.max(minPartitions, 1.0)).toLong
val files = listStatus(context).asScala
val totalLen = files.filterNot(_.isDirectory).map(_.getLen).sum
val maxSplitSize = math.ceil(totalLen / math.max(minPartitions, files.size)).toLong
super.setMaxSplitSize(maxSplitSize)
}

Expand Down