@@ -45,7 +45,7 @@ object GoldilocksWithHashMap {
4545 val sortedAggregatedValueColumnPairs = aggregatedValueColumnPairs.sortByKey()
4646 sortedAggregatedValueColumnPairs.persist(StorageLevel .MEMORY_AND_DISK )
4747
48- val numOfColumns = dataFrame.schema.length
48+ val numOfColumns = dataFrame.schema.length
4949 val partitionColumnsFreq =
5050 getColumnsFreqPerPartition(sortedAggregatedValueColumnPairs, numOfColumns)
5151 val ranksLocations =
@@ -79,7 +79,7 @@ object GoldilocksWithHashMap {
7979 def getAggregatedValueColumnPairs (dataFrame : DataFrame ):
8080 RDD [((Double , Int ), Long )] = {
8181
82- val aggregatedValueColumnRDD = dataFrame.rdd.mapPartitions(rows => {
82+ val aggregatedValueColumnRDD = dataFrame.rdd.mapPartitions(rows => {
8383 val valueColumnMap = new mutable.HashMap [(Double , Int ), Long ]()
8484 rows.foreach(row => {
8585 row.toSeq.zipWithIndex.foreach{ case (value, columnIndex) =>
@@ -312,7 +312,7 @@ object FindTargetsSubRoutine extends Serializable {
312312 // A HashMap with the running totals of each column index. As we loop through
313313 // the iterator. We will update the hashmap as we see elements of each
314314 // column index.
315- val runningTotals : mutable.HashMap [Int , Long ]= new mutable.HashMap ()
315+ val runningTotals : mutable.HashMap [Int , Long ]= new mutable.HashMap ()
316316 runningTotals ++= columnsInThisPart.map(columnIndex => (columnIndex, 0L )).toMap
317317
318318 // we use an array buffer to build the resulting iterator
@@ -327,7 +327,7 @@ object FindTargetsSubRoutine extends Serializable {
327327 val total = runningTotals(colIndex)
328328 // the ranks that are contains by this element of the input iterator.
329329 // get by filtering the
330- val ranksPresent = columnsRelativeIndex(colIndex)
330+ val ranksPresent = columnsRelativeIndex(colIndex)
331331 .filter(index => (index <= count + total) && (index > total))
332332 ranksPresent.foreach(r => result += ((colIndex, value)))
333333 // update the running totals.
@@ -352,11 +352,11 @@ object FindTargetsSubRoutine extends Serializable {
352352 val columnsRelativeIndex = targetsInThisPart.groupBy(_._1).mapValues(_.map(_._2))
353353 val columnsInThisPart = targetsInThisPart.map(_._1).distinct
354354
355- val runningTotals : mutable.HashMap [Int , Long ]= new mutable.HashMap ()
355+ val runningTotals : mutable.HashMap [Int , Long ]= new mutable.HashMap ()
356356 runningTotals ++= columnsInThisPart.map(columnIndex => (columnIndex, 0L )).toMap
357357
358358 // filter out the pairs that don't have a column index that is in this part
359- val pairsWithRanksInThisPart = valueColumnPairsIter.filter{
359+ val pairsWithRanksInThisPart = valueColumnPairsIter.filter{
360360 case (((value, colIndex), count)) =>
361361 columnsInThisPart contains colIndex
362362 }
0 commit comments