Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address the comments
  • Loading branch information
jerryshao committed Sep 18, 2016
commit be1abfa0e902fca3ed945bfbb6e0573909d55e2b
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ abstract class CompactibleFileStreamLog[T: ClassTag](
protected def deserializeData(encodedString: String): T

/**
* Filter out the obsolote logs.
* Filter out the obsolete logs.
*/
def compactLogs(logs: Seq[T]): Seq[T]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

package org.apache.spark.sql.execution.streaming

import java.util.{LinkedHashMap => JLinkedHashMap}
import java.util.Map.Entry

import scala.collection.mutable

import org.json4s.NoTypeHints
Expand Down Expand Up @@ -49,17 +52,13 @@ class FileStreamSourceLog(

private implicit val formats = Serialization.formats(NoTypeHints)

// A fixed size log cache to cache the file entries belong to the compaction batch. It is used
// to avoid scanning the compacted log file to retrieve it's own batch data.
// A fixed size log entry cache to cache the file entries belong to the compaction batch. It is
// used to avoid scanning the compacted log file to retrieve it's own batch data.
private val cacheSize = compactInterval
private val fileEntryCache = new mutable.LinkedHashMap[Long, Array[FileEntry]]

private def updateCache(batchId: Long, logs: Array[FileEntry]): Unit = {
if (fileEntryCache.size >= cacheSize) {
fileEntryCache.drop(1)
private val fileEntryCache = new JLinkedHashMap[Long, Array[FileEntry]] {
override def removeEldestEntry(eldest: Entry[Long, Array[FileEntry]]): Boolean = {
size() > cacheSize
}

fileEntryCache.put(batchId, logs)
}

protected override def serializeData(data: FileEntry): String = {
Expand All @@ -76,7 +75,7 @@ class FileStreamSourceLog(

override def add(batchId: Long, logs: Array[FileEntry]): Boolean = {
if (super.add(batchId, logs) && isCompactionBatch(batchId, compactInterval)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is wrong. If super.add(batchId, logs) is false, then we should always return false.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, you're right, I will fix it.

updateCache(batchId, logs)
fileEntryCache.put(batchId, logs)
true
} else if (!isCompactionBatch(batchId, compactInterval)) {
true
Expand All @@ -90,8 +89,8 @@ class FileStreamSourceLog(
val endBatchId = getLatest().map(_._1).getOrElse(0L)

val (existedBatches, removedBatches) = (startBatchId to endBatchId).map { id =>
if (isCompactionBatch(id, compactInterval) && fileEntryCache.contains(id)) {
(id, Some(fileEntryCache(id)))
if (isCompactionBatch(id, compactInterval) && fileEntryCache.containsKey(id)) {
(id, Some(fileEntryCache.get(id)))
} else {
val logs = super.get(id).map(_.filter(_.batchId == id))
(id, logs)
Expand Down