-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19182][DStream] Optimize the lock in StreamingJobProgressListener to not block UI when generating Streaming jobs #16601
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,12 +31,15 @@ final private[streaming] class DStreamGraph extends Serializable with Logging { | |
| private val inputStreams = new ArrayBuffer[InputDStream[_]]() | ||
| private val outputStreams = new ArrayBuffer[DStream[_]]() | ||
|
|
||
| val inputStreamNameAndID = new ArrayBuffer[(String, Int)]() | ||
|
|
||
| var rememberDuration: Duration = null | ||
| var checkpointInProgress = false | ||
|
|
||
| var zeroTime: Time = null | ||
| var startTime: Time = null | ||
| var batchDuration: Duration = null | ||
| var numReceivers: Int = 0 | ||
|
||
|
|
||
| def start(time: Time) { | ||
| this.synchronized { | ||
|
|
@@ -45,7 +48,9 @@ final private[streaming] class DStreamGraph extends Serializable with Logging { | |
| startTime = time | ||
| outputStreams.foreach(_.initialize(zeroTime)) | ||
| outputStreams.foreach(_.remember(rememberDuration)) | ||
| outputStreams.foreach(_.validateAtStart) | ||
| outputStreams.foreach(_.validateAtStart()) | ||
| numReceivers = inputStreams.count(_.isInstanceOf[ReceiverInputDStream[_]]) | ||
| inputStreams.foreach(is => inputStreamNameAndID.+=((is.name, is.id))) | ||
| inputStreams.par.foreach(_.start()) | ||
| } | ||
| } | ||
|
|
@@ -106,16 +111,18 @@ final private[streaming] class DStreamGraph extends Serializable with Logging { | |
| .toArray | ||
| } | ||
|
|
||
| def getInputStreamName(streamId: Int): Option[String] = synchronized { | ||
| inputStreams.find(_.id == streamId).map(_.name) | ||
| } | ||
| def getReceiverNumber: Int = numReceivers | ||
|
||
|
|
||
| def getInputStreamNameAndID: ArrayBuffer[(String, Int)] = inputStreamNameAndID | ||
|
|
||
| def generateJobs(time: Time): Seq[Job] = { | ||
| logDebug("Generating jobs for time " + time) | ||
| val jobs = getOutputStreams().flatMap { outputStream => | ||
| val jobOption = outputStream.generateJob(time) | ||
| jobOption.foreach(_.setCallSite(outputStream.creationSite)) | ||
| jobOption | ||
| val jobs = this.synchronized { | ||
| outputStreams.flatMap { outputStream => | ||
| val jobOption = outputStream.generateJob(time) | ||
| jobOption.foreach(_.setCallSite(outputStream.creationSite)) | ||
| jobOption | ||
| } | ||
| } | ||
| logDebug("Generated " + jobs.length + " jobs for time " + time) | ||
| jobs | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -169,7 +169,7 @@ private[spark] class StreamingJobProgressListener(ssc: StreamingContext) | |
| } | ||
|
|
||
| def numInactiveReceivers: Int = { | ||
| ssc.graph.getReceiverInputStreams().length - numActiveReceivers | ||
| ssc.graph.getReceiverNumber - numActiveReceivers | ||
| } | ||
|
|
||
| def numTotalCompletedBatches: Long = synchronized { | ||
|
|
@@ -197,17 +197,17 @@ private[spark] class StreamingJobProgressListener(ssc: StreamingContext) | |
| } | ||
|
|
||
| def retainedCompletedBatches: Seq[BatchUIData] = synchronized { | ||
| completedBatchUIData.toSeq | ||
| completedBatchUIData | ||
|
||
| } | ||
|
|
||
| def streamName(streamId: Int): Option[String] = { | ||
| ssc.graph.getInputStreamName(streamId) | ||
| ssc.graph.getInputStreamNameAndID.find(_._2 == streamId).map(_._1) | ||
| } | ||
|
|
||
| /** | ||
| * Return all InputDStream Ids | ||
| */ | ||
| def streamIds: Seq[Int] = ssc.graph.getInputStreams().map(_.id) | ||
| def streamIds: Seq[Int] = ssc.graph.getInputStreamNameAndID.map(_._2) | ||
|
|
||
| /** | ||
| * Return all of the record rates for each InputDStream in each batch. The key of the return value | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: change it to
@volatile private var inputStreamNameAndID: Seq[(String, Int)] = Niland just set it instart. Don't expose a mutable ArrayBuffer to the caller.