Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c8e8abe
SPARK-23429: Add executor memory metrics to heartbeat and expose in e…
edwinalu Mar 9, 2018
5d6ae1c
modify MimaExcludes.scala to filter changes to SparkListenerExecutorM…
edwinalu Apr 2, 2018
ad10d28
Address code review comments, change event logging to stage end.
edwinalu Apr 22, 2018
10ed328
Add configuration parameter spark.eventLog.logExecutorMetricsUpdates.…
edwinalu May 15, 2018
2d20367
wip on enum based metrics
squito May 23, 2018
f904f1e
wip ... has both enum and non-enum version
squito May 23, 2018
c502ec4
case objects, mostly complete
squito May 23, 2018
7879e66
Merge pull request #1 from squito/metric_enums
edwinalu Jun 3, 2018
2662f6f
Address comments (move heartbeater from DAGScheduler to SparkContext,…
edwinalu Jun 10, 2018
2871335
SPARK-23429: Add executor memory metrics to heartbeat and expose in e…
edwinalu Mar 9, 2018
da83f2e
modify MimaExcludes.scala to filter changes to SparkListenerExecutorM…
edwinalu Apr 2, 2018
f25a44b
Address code review comments, change event logging to stage end.
edwinalu Apr 22, 2018
ca85c82
Add configuration parameter spark.eventLog.logExecutorMetricsUpdates.…
edwinalu May 15, 2018
8b74ba8
wip on enum based metrics
squito May 23, 2018
036148c
wip ... has both enum and non-enum version
squito May 23, 2018
91fb1db
case objects, mostly complete
squito May 23, 2018
2d8894a
Address comments (move heartbeater from DAGScheduler to SparkContext,…
edwinalu Jun 10, 2018
99044e6
Merge branch 'SPARK-23429.2' of https://github.com/edwinalu/spark int…
edwinalu Jun 14, 2018
263c8c8
code review comments
edwinalu Jun 14, 2018
812fdcf
code review comments:
edwinalu Jun 22, 2018
7ed42a5
Address code review comments. Also make executorUpdates in SparkListe…
edwinalu Jun 28, 2018
8d9acdf
Revert and make executorUpdates in SparkListenerExecutorMetricsUpdate…
edwinalu Jun 29, 2018
20799d2
code review comments: hid array implementation of executor metrics, a…
edwinalu Jul 25, 2018
8905d23
merge with master
edwinalu Jul 25, 2018
04875b8
Integration of ProcessTreeMetrics with PR 21221
Jul 26, 2018
a0eed11
address code review comments
edwinalu Aug 5, 2018
162b9b2
Merge branch 'SPARK-23429.2' of https://github.com/edwinalu/spark int…
Aug 6, 2018
29a44c7
Changing the position of ptree and also make the computation configur…
Aug 7, 2018
3671427
Seperate metrics for jvm, python and others and update the tests
Aug 8, 2018
03cd5bc
code review comments
edwinalu Aug 13, 2018
c79b5ab
Merge branch 'SPARK-23429.2' of https://github.com/edwinalu/spark int…
Aug 14, 2018
10e7f15
Merge branch 'master' into SPARK-23429.2
edwinalu Aug 14, 2018
a14b82a
merge conflicts
edwinalu Aug 14, 2018
2897281
disable stage executor metrics logging by default
edwinalu Aug 16, 2018
8f97b50
Merge branch 'SPARK-23429.2' of https://github.com/rezasafi/spark int…
Aug 17, 2018
b14cebc
Update JsonProtocolSuite with new metrics.
Aug 17, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
code review comments
  • Loading branch information
edwinalu committed Jun 17, 2018
commit 263c8c846265b6bdfdce471e44c163ab85b930a3
5 changes: 0 additions & 5 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,6 @@ class SparkContext(config: SparkConf) extends Logging {
_dagScheduler = ds
}

private[spark] def heartbeater: Heartbeater = _heartbeater
private[spark] def heartbeater_=(hb: Heartbeater): Unit = {
_heartbeater = hb
}

/**
* A unique identifier for the Spark application.
* Its format depends on the scheduler implementation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1753,7 +1753,7 @@ class DAGScheduler(
messageScheduler.shutdownNow()
eventProcessLoop.stop()
taskScheduler.stop()
}
}

eventProcessLoop.start()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ private[spark] class EventLoggingListener(
// Visible for tests only.
private[scheduler] val logPath = getLogPath(logBaseDir, appId, appAttemptId, compressionCodecName)

// map of live stages, to peak executor metrics for the stage
// map of (stageId, stageAttempt), to peak executor metrics for the stage
private val liveStageExecutorMetrics = HashMap[(Int, Int), HashMap[String, PeakExecutorMetrics]]()

/**
Expand Down Expand Up @@ -197,6 +197,9 @@ private[spark] class EventLoggingListener(
executorMap.foreach {
executorEntry => {
for ((executorId, peakExecutorMetrics) <- executorEntry) {
// -1 timestamp indicates that the ExecutorMetricsUpdate event is being read from the
// history log, and contains the peak metrics for the stage whose StageCompleted event
// immediately follows
val executorMetrics = new ExecutorMetrics(-1, peakExecutorMetrics.metrics)
val executorUpdate = new SparkListenerExecutorMetricsUpdate(
executorId, accumUpdates, Some(executorMetrics))
Expand Down
16 changes: 6 additions & 10 deletions core/src/main/scala/org/apache/spark/status/api/v1/api.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import scala.xml.{NodeSeq, Text}

import com.fasterxml.jackson.annotation.JsonIgnoreProperties
import com.fasterxml.jackson.core.{JsonGenerator, JsonParser}
import com.fasterxml.jackson.core.`type`.TypeReference
import com.fasterxml.jackson.databind.{DeserializationContext, JsonDeserializer, JsonSerializer, SerializerProvider}
import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize}

Expand Down Expand Up @@ -113,26 +114,21 @@ class MemoryMetrics private[spark](
val totalOffHeapStorageMemory: Long)

/** deserialzer for peakMemoryMetrics: convert to array ordered by metric name */
class PeakMemoryMetricsDeserializer extends JsonDeserializer[Option[Array[Long]]] {
class PeakMemoryMetricsDeserializer private[spark] extends JsonDeserializer[Option[Array[Long]]] {
override def deserialize(
jsonParser: JsonParser,
deserializationContext: DeserializationContext): Option[Array[Long]] = {
val metricsMap = jsonParser.readValueAs(classOf[Option[Map[String, Object]]])
val metricsMap = jsonParser.readValueAs[Option[Map[String, Long]]](
new TypeReference[Option[Map[String, java.lang.Long]]] {})
metricsMap match {
case Some(metrics) =>
Some(MetricGetter.values.map { m =>
metrics.getOrElse (m.name, 0L) match {
case intVal: Int => intVal.toLong
case longVal: Long => longVal
}
}.toArray)
Some(MetricGetter.values.map(m => metrics.getOrElse(m.name, 0L)).toArray)
case None => None
}
}
}

/** serializer for peakMemoryMetrics: convert array to map with metric name as key */
class PeakMemoryMetricsSerializer extends JsonSerializer[Option[Array[Long]]] {
class PeakMemoryMetricsSerializer private[spark] extends JsonSerializer[Option[Array[Long]]] {
override def serialize(
metrics: Option[Array[Long]],
jsonGenerator: JsonGenerator,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.immutable.Map
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.Set
import scala.io.Source

import org.apache.hadoop.fs.Path
Expand Down Expand Up @@ -301,48 +301,66 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
createExecutorAddedEvent(1),
createExecutorAddedEvent(2),
createStageSubmittedEvent(0),
// receive 3 metric updates from each executor with just stage 0 running,
// with different peak updates for each executor
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(10L,
Array(4000L, 50L, 20L, 0L, 40L, 0L, 60L, 0L, 70L, 20L))),
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(10L,
Array(1500L, 50L, 20L, 0L, 0L, 0L, 20L, 0L, 70L, 0L))),
// exec 1: new stage 0 peaks for metrics at indexes: 2, 4, 6
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(15L,
Array(4000L, 50L, 50L, 0L, 50L, 0L, 100L, 0L, 70L, 20L))),
// exec 2: new stage 0 peaks for metrics at indexes: 0, 4, 6
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(15L,
Array(2000L, 50L, 10L, 0L, 10L, 0L, 30L, 0L, 70L, 0L))),
// exec 1: new stage 0 peaks for metrics at indexes: 5, 7
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(20L,
Array(2000L, 40L, 50L, 0L, 40L, 10L, 90L, 10L, 50L, 0L))),
// exec 2: new stage 0 peaks for metrics at indexes: 0, 5, 6, 7, 8
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(20L,
Array(3500L, 50L, 15L, 0L, 10L, 10L, 35L, 10L, 80L, 0L))),
// now start stage 1, one more metric update for each executor, and new
// peaks for some stage 1 metrics (as listed), initialize stage 1 peaks
createStageSubmittedEvent(1),
// exec 1: new stage 0 peaks for metrics at indexes: 0, 3, 7
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(25L,
Array(5000L, 30L, 50L, 20L, 30L, 10L, 80L, 30L, 50L, 0L))),
// exec 2: new stage 0 peaks for metrics at indexes: 0, 1, 2, 3, 6, 7, 9
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(25L,
Array(7000L, 70L, 50L, 20L, 0L, 10L, 50L, 30L, 10L, 40L))),
// complete stage 0, and 3 more updates for each executor with just
// stage 1 running
createStageCompletedEvent(0),
// exec 1: new stage 1 peaks for metrics at indexes: 0, 1, 3
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(30L,
Array(6000L, 70L, 20L, 30L, 10L, 0L, 30L, 30L, 30L, 0L))),
// exec 2: new stage 1 peaks for metrics at indexes: 3, 4, 7, 8
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(30L,
Array(5500L, 30L, 20L, 40L, 10L, 0L, 30L, 40L, 40L, 20L))),
// exec 1: new stage 1 peaks for metrics at indexes: 0, 4, 5, 7
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(35L,
Array(7000L, 70L, 5L, 25L, 60L, 30L, 65L, 55L, 30L, 0L))),
// exec 2: new stage 1 peak for metrics at index: 7
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(35L,
Array(5500L, 40L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 20L))),
// exec 1: no new stage 1 peaks
createExecutorMetricsUpdateEvent(1,
new ExecutorMetrics(40L,
Array(5500L, 70L, 15L, 20L, 55L, 20L, 70L, 40L, 20L, 0L))),
createExecutorRemovedEvent(1),
// exec 2: new stage 1 peak for metrics at index: 6
createExecutorMetricsUpdateEvent(2,
new ExecutorMetrics(40L,
Array(4000L, 20L, 25L, 30L, 10L, 30L, 35L, 60L, 0L, 0L))),
Expand Down Expand Up @@ -373,11 +391,14 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
event match {
case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
case stageCompleted: SparkListenerStageCompleted =>
val execIds = Set[String]()
(1 to 2).foreach { _ =>
checkExecutorMetricsUpdate(lines(logIdx), stageCompleted.stageInfo.stageId,
expectedMetricsEvents)
val execId = checkExecutorMetricsUpdate(lines(logIdx),
stageCompleted.stageInfo.stageId, expectedMetricsEvents)
execIds += execId
logIdx += 1
}
assert(execIds.size == 2) // check that each executor was logged
checkEvent(lines(logIdx), event)
logIdx += 1
case _ =>
Expand Down Expand Up @@ -462,16 +483,17 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
private def checkExecutorMetricsUpdate(
line: String,
stageId: Int,
expectedEvents: Map[(Int, String), SparkListenerExecutorMetricsUpdate]): Unit = {
expectedEvents: Map[(Int, String), SparkListenerExecutorMetricsUpdate]): String = {
JsonProtocol.sparkEventFromJson(parse(line)) match {
case executorMetrics: SparkListenerExecutorMetricsUpdate =>
expectedEvents.get((stageId, executorMetrics.execId)) match {
case Some(expectedMetrics) =>
assert(executorMetrics.accumUpdates.isEmpty)
checkExecutorMetrics(executorMetrics.executorUpdates, expectedMetrics.executorUpdates)
case None =>
case None =>
assert(false)
}
executorMetrics.execId
case _ =>
fail("expecting SparkListenerExecutorMetricsUpdate")
}
Expand Down