progress

apache · brkyvz · Oct 29, 2015 · Oct 29, 2015 · Oct 30, 2015 · Oct 30, 2015
commit 573b657bca5a77297cafbde489ba380b3372c81c
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -88,8 +88,10 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     if (eventLoop == null) return // scheduler has already been stopped
     logDebug("Stopping JobScheduler")
 
-    // First, stop receiving
-    receiverTracker.stop(processAllReceivedData)
+    if (receiverTracker != null) {
+      // First, stop receiving
+      receiverTracker.stop(processAllReceivedData)
+    }
 
     // Second, stop generating jobs. If it has to process all received data,
     // then this will wait for all the processing through JobScheduler to be over.

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -126,11 +126,11 @@ private[streaming] class FileBasedWriteAheadLog(
     val logFilesToRead = pastLogs.map{ _.path} ++ currentLogPath
     logInfo("Reading from the logs: " + logFilesToRead.mkString("\n"))
 
-    logFilesToRead.iterator.map { file =>
+    logFilesToRead.par.map { file =>
       logDebug(s"Creating log reader with $file")
       val reader = new FileBasedWriteAheadLogReader(file, hadoopConf)
       CompletionIterator[ByteBuffer, Iterator[ByteBuffer]](reader, reader.close _)
-    }.flatten.asJava
+    }.flatten.toIterator.asJava
   }
 
   /**

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogReader.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.streaming.util
 
-import java.io.{Closeable, EOFException}
+import java.io.{IOException, Closeable, EOFException}
 import java.nio.ByteBuffer
 
 import org.apache.hadoop.conf.Configuration
@@ -55,6 +55,18 @@ private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Config
           logDebug("Error reading next item, EOF reached", e)
           close()
           false
+        case e: IOException =>
+          logWarning("Error while trying to read data. If the file was deleted, " +
+            "this should be okay.", e)
+          if (HdfsUtils.checkFileExists(path, conf)) {
+            throw e
+          } else {
+            // file was deleted. This can occur when the daemon cleanup thread takes time to
+            // delete the file during recovery.
+            close()
+            false
+          }
+
         case e: Exception =>
           logWarning("Error while trying to read data from HDFS.", e)
           close()

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -71,4 +71,9 @@ private[streaming] object HdfsUtils {
       case _ => fs
     }
   }
+
+  def checkFileExists(path: Path, conf: Configuration): Boolean = {
+    val fs = getFileSystemForPath(path, conf)
+    fs.isFile(path)
+  }
 }