-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-9104][SPARK-9105][SPARK-9106][SPARK-9107][CORE] Netty network layer memory usage on webUI #7753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-9104][SPARK-9105][SPARK-9106][SPARK-9107][CORE] Netty network layer memory usage on webUI #7753
Changes from 1 commit
39ba441
ecc1044
2101538
9ccaf88
13c17fb
c9b44b1
984feaf
2501c82
e0ae855
7491279
424c172
f21a804
2f3d30b
7b846a2
41874aa
0531d0f
27b7da1
a8fcf74
f2f0e64
5f7a999
5ad7a6a
c836fb9
b5aa4da
e8e2bdd
1dffa29
75e63c3
0c1241c
c78628e
a93bd96
89214f3
1ed48c1
cb307aa
b438077
4123ac7
2ce9fd9
17d094e
4b3dbe4
0ea7cab
5e031ce
87f8172
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,7 @@ import org.json4s.jackson.JsonMethods._ | |
|
|
||
| import org.apache.spark.{Logging, SparkConf, SPARK_VERSION} | ||
| import org.apache.spark.deploy.SparkHadoopUtil | ||
| import org.apache.spark.executor.TransportMetrics | ||
| import org.apache.spark.executor.{ExecutorMetrics, TransportMetrics} | ||
| import org.apache.spark.io.CompressionCodec | ||
| import org.apache.spark.util.{JsonProtocol, Utils} | ||
|
|
||
|
|
@@ -169,10 +169,9 @@ private[spark] class EventLoggingListener( | |
| // in {{executorIdToLatestMetrics}}. | ||
| private def updateAndLogExecutorMemoryMetrics() : Unit = { | ||
| executorIdToModifiedMaxMetrics.foreach { case(_, metrics) => logEvent(metrics) } | ||
| // Clear the modified metrics map after each log action | ||
| executorIdToModifiedMaxMetrics.clear() | ||
| executorIdToLatestMetrics.foreach {case(_, metrics) => logEvent(metrics) } | ||
| executorIdToLatestMetrics.foreach { case (executorId, metrics) => | ||
| executorIdToModifiedMaxMetrics.update(executorId, metrics) | ||
| } | ||
| } | ||
|
|
||
| // Events that do not trigger a flush | ||
|
|
@@ -234,8 +233,12 @@ private[spark] class EventLoggingListener( | |
|
|
||
| // No-op because logging every update would be overkill | ||
| override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate): Unit = { | ||
| executorIdToLatestMetrics.update(event.execId, event) | ||
| updateModifiedMetrics(event.execId) | ||
| // In order to avoid the logged event consumes too much storage size, taskMetrics would not | ||
| // be logged into event log file currently | ||
| val lightEvent = SparkListenerExecutorMetricsUpdate( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the comment above the method is inaccurate (this is no longer a no-op obviously). Can you change it something like "Track executor metrics for logging on stage start and end". I'd also update the inner comment to something like "We only track the executor metrics in each stage, so we drop the task metrics as they are quite verbose". and maybe rename "lightEvent" to "eventWithoutTaskMetrics"? |
||
| event.execId, event.executorMetrics, Seq.empty) | ||
| executorIdToLatestMetrics.update(lightEvent.execId, lightEvent) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: more idiomatic scala to write |
||
| updateModifiedMetrics(lightEvent.execId) | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -288,8 +291,15 @@ private[spark] class EventLoggingListener( | |
| } else { | ||
| toBeModTransMetrics.offHeapSize | ||
| } | ||
| toBeModifiedEvent.executorMetrics.setTransportMetrics( | ||
| TransportMetrics(timeStamp, onHeapSize, offHeapSize)) | ||
|
|
||
| // We should maintain a new instance for each update to avoid side-effect | ||
| val modifiedExecMetrics = new ExecutorMetrics() | ||
| modifiedExecMetrics.setHostname(toBeModifiedEvent.executorMetrics.hostname) | ||
| modifiedExecMetrics.setTransportMetrics(TransportMetrics( | ||
| timeStamp, onHeapSize, offHeapSize)) | ||
| val modifiedEvent = SparkListenerExecutorMetricsUpdate( | ||
| toBeModifiedEvent.execId, modifiedExecMetrics, toBeModifiedEvent.taskMetrics) | ||
| executorIdToModifiedMaxMetrics.update(executorId, modifiedEvent) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -122,6 +122,105 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit | |
| "a fine:mind$dollar{bills}.1", None, Some("lz4"))) | ||
| } | ||
|
|
||
| test("test event logger logging executor metrics") { | ||
| import org.apache.spark.scheduler.cluster._ | ||
| import org.apache.spark.ui.memory._ | ||
| val conf = EventLoggingListenerSuite.getLoggingConf(testDirPath) | ||
| val eventLogger = new EventLoggingListener("test-memListener", None, testDirPath.toUri(), conf) | ||
| val execId = "exec-1" | ||
| val hostName = "host-1" | ||
|
|
||
| eventLogger.start() | ||
| eventLogger.onExecutorAdded(SparkListenerExecutorAdded( | ||
| 0L, execId, new ExecutorInfo(hostName, 1, Map.empty))) | ||
|
|
||
| // stage 1 and stage 2 submitted | ||
| eventLogger.onStageSubmitted(MemoryListenerSuite.createStageStartEvent(1)) | ||
| eventLogger.onStageSubmitted(MemoryListenerSuite.createStageStartEvent(2)) | ||
| val execMetrics1 = MemoryListenerSuite.createExecutorMetrics(hostName, 1L, 20, 10) | ||
| eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent( | ||
| execId, execMetrics1)) | ||
| val execMetrics2 = MemoryListenerSuite.createExecutorMetrics(hostName, 2L, 30, 10) | ||
| eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent( | ||
| execId, execMetrics2)) | ||
| // stage1 completed | ||
| eventLogger.onStageCompleted(MemoryListenerSuite.createStageEndEvent(1)) | ||
| // stage3 submitted | ||
| eventLogger.onStageSubmitted(MemoryListenerSuite.createStageStartEvent(3)) | ||
| val execMetrics3 = MemoryListenerSuite.createExecutorMetrics(hostName, 3L, 30, 30) | ||
| eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent( | ||
| execId, execMetrics3)) | ||
| val execMetrics4 = MemoryListenerSuite.createExecutorMetrics(hostName, 4L, 20, 25) | ||
| eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent( | ||
| execId, execMetrics4)) | ||
| // stage 2 completed | ||
| eventLogger.onStageCompleted(MemoryListenerSuite.createStageEndEvent(2)) | ||
| val execMetrics5 = MemoryListenerSuite.createExecutorMetrics(hostName, 5L, 15, 15) | ||
| eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent( | ||
| execId, execMetrics5)) | ||
| val execMetrics6 = MemoryListenerSuite.createExecutorMetrics(hostName, 6L, 25, 10) | ||
| eventLogger.onExecutorMetricsUpdate(MemoryListenerSuite.createExecutorMetricsUpdateEvent( | ||
| execId, execMetrics6)) | ||
| // stage 3 completed | ||
| eventLogger.onStageCompleted(MemoryListenerSuite.createStageEndEvent(3)) | ||
|
|
||
| eventLogger.onExecutorRemoved(SparkListenerExecutorRemoved(7L, execId, "")) | ||
|
|
||
| // Totally there are 15 logged events, including: | ||
| // 2 events of executor Added/Removed | ||
| // 6 events of stage Submitted/Completed | ||
| // 7 events of executorMetrics update (3 combined metrics and 4 original metrics) | ||
| assert(eventLogger.loggedEvents.size === 15) | ||
| eventLogger.stop() | ||
|
|
||
| val logData = EventLoggingListener.openEventLog(new Path(eventLogger.logPath), fileSystem) | ||
| val lines = readLines(logData) | ||
| Utils.tryWithSafeFinally { | ||
| // totally there are 15 lines, including SparkListenerLogStart event and 14 other events | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment is wrong (off by one). maybe just make comment "one extra line for SparkListenerLogStart"
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, forget to update the comments, thanks. |
||
| assert(lines.size === 16) | ||
|
|
||
| // 4 executor metrics that is the latest metrics updated before stage submit and complete | ||
| val jsonMetrics = JsonProtocol.sparkEventFromJson(parse(lines(5))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a fan of pulling out very specific lines of the log here -- it makes it harder for the reader to follow, and also makes the tests more brittle. Could you instead have a util method like parsedLines = line.map { line => JsonProtocol.sparkEventFromJson(parse(line)) }
...
checkExecutorMetrics(
metrics = getLastExecutorMetricBeforeStageEnd(parsedLines, 3),
expMetrics = ...
)(not quite the right args, but hopefully that conveys the idea). You'd also need to make sure the stage end events had a completion time in there to be able to grab the right event.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. another idea: write an integration test, which pumps the parsed events back into the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, I'll add an integration test, make it cleaner. |
||
| assert(Utils.getFormattedClassName(jsonMetrics) === Utils.getFormattedClassName( | ||
| SparkListenerExecutorMetricsUpdate)) | ||
| val jsonMetrics2 = jsonMetrics.asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| assert((execId, (hostName, 2L, 30, 10)) === (jsonMetrics2.execId, jsonMetrics2 | ||
| .executorMetrics.metricsDetails)) | ||
|
|
||
| val jsonMetrics4 = JsonProtocol.sparkEventFromJson(parse(lines(7))) | ||
| .asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| val jsonMetrics6 = JsonProtocol.sparkEventFromJson(parse(lines(10))) | ||
| .asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| val jsonMetrics8 = JsonProtocol.sparkEventFromJson(parse(lines(13))) | ||
| .asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| assert((execId, (hostName, 2L, 30, 10)) === (jsonMetrics4.execId, jsonMetrics4 | ||
| .executorMetrics.metricsDetails)) | ||
| assert((execId, (hostName, 4L, 20, 25)) === (jsonMetrics6.execId, jsonMetrics6 | ||
| .executorMetrics.metricsDetails)) | ||
| assert((execId, (hostName, 6L, 25, 10)) === (jsonMetrics8.execId, jsonMetrics8 | ||
| .executorMetrics.metricsDetails)) | ||
|
|
||
| // 3 executor metrics that is combined metrics that updated during each time segment | ||
| // There is no combined metrics before "jsonMetrics4" (lines(7)) because there is no | ||
| // metrics update between stage 1 complete and stage 3 submit. So only the last metrics | ||
| // update will be logged. | ||
| val jsonMetrics1 = JsonProtocol.sparkEventFromJson(parse(lines(4))) | ||
| .asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| val jsonMetrics5 = JsonProtocol.sparkEventFromJson(parse(lines(9))) | ||
| .asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| val jsonMetrics7 = JsonProtocol.sparkEventFromJson(parse(lines(12))) | ||
| .asInstanceOf[SparkListenerExecutorMetricsUpdate] | ||
| assert((execId, (hostName, 2L, 30, 10)) === (jsonMetrics1.execId, jsonMetrics1 | ||
| .executorMetrics.metricsDetails)) | ||
| assert((execId, (hostName, 3L, 30, 30)) === (jsonMetrics5.execId, jsonMetrics5 | ||
| .executorMetrics.metricsDetails)) | ||
| assert((execId, (hostName, 6L, 25, 15)) === (jsonMetrics7.execId, jsonMetrics7 | ||
| .executorMetrics.metricsDetails)) | ||
| } { | ||
| logData.close() | ||
| } | ||
| } | ||
|
|
||
| /* ----------------- * | ||
| * Actual test logic * | ||
| * ----------------- */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rename this to
updateAndLogExecutorMemoryMetricsor something like that, to be a little more specific. I'd also change the first sentence of the comment to something likeI don't understand the last two sentences of the comment -- can you expand on that?
Finally you should use
foreachand you can usecaseto extract the parts you want and make it a little clearer:modifiedMetrics.foreach { case (_, metrics) => logEvent(metrics) } latestMetrics.foreach { case (executorId, metrics) => modifiedMetrics.update(executorId, metrics) }There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll update the code according to the design doc. I think the code is not that correct. Please refer it in design doc