Applying latest review comments

apache · rezasafi · Jul 26, 2018 · Aug 7, 2018 · Aug 8, 2018 · Sep 25, 2018
commit 8f208574a293a94d9029cc4adb45b03e7a67ed47
diff --git a/core/src/main/scala/org/apache/spark/Heartbeater.scala b/core/src/main/scala/org/apache/spark/Heartbeater.scala
@@ -63,10 +63,10 @@ private[spark] class Heartbeater(
    * Get the current executor level metrics. These are returned as an array
    */
   def getCurrentMetrics(): ExecutorMetrics = {
-    val metrics = new Array[Long](ExecutorMetricType.numberOfMetrics)
+    val metrics = new Array[Long](ExecutorMetricType.numMetrics)
     var offset = 0
     ExecutorMetricType.metricGetters.foreach { metric =>
-      val newSetOfMetrics = metric.getMetricSet(memoryManager)
+      val newSetOfMetrics = metric.getMetricValues(memoryManager)
       Array.copy(newSetOfMetrics, 0, metrics, offset, newSetOfMetrics.size)
       offset += newSetOfMetrics.length
     }

diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -28,14 +28,14 @@ import org.apache.spark.metrics.ExecutorMetricType
 @DeveloperApi
 class ExecutorMetrics private[spark] extends Serializable {
 
-  private val metrics = new Array[Long](ExecutorMetricType.numberOfMetrics)
+  private val metrics = new Array[Long](ExecutorMetricType.numMetrics)
   // the first element is initialized to -1, indicating that the values for the array
   // haven't been set yet.
   metrics(0) = -1
 
   /** Returns the value for the specified metric. */
   def getMetricValue(metricName: String): Long = {
-    metrics(ExecutorMetricType.definedMetricsAndOffset.get(metricName).get)
+    metrics(ExecutorMetricType.metricToOffset.get(metricName).get)
   }
 
   /** Returns true if the values for the metrics have been set, false otherwise. */
@@ -53,7 +53,7 @@ class ExecutorMetrics private[spark] extends Serializable {
    */
   private[spark] def this(executorMetrics: Map[String, Long]) {
     this()
-    ExecutorMetricType.definedMetricsAndOffset.map { m =>
+    ExecutorMetricType.metricToOffset.map { m =>
       metrics(m._2) = executorMetrics.getOrElse(m._1, 0L)
     }
   }
@@ -67,10 +67,10 @@ class ExecutorMetrics private[spark] extends Serializable {
    */
   private[spark] def compareAndUpdatePeakValues(executorMetrics: ExecutorMetrics): Boolean = {
     var updated = false
-    ExecutorMetricType.definedMetricsAndOffset.map {m =>
-      if (executorMetrics.metrics(m._2) > metrics(m._2)) {
+    ExecutorMetricType.metricToOffset.map { case (_, idx) =>
+      if (executorMetrics.metrics(idx) > metrics(idx)) {
         updated = true
-        metrics(m._2) = executorMetrics.metrics(m._2)
+        metrics(idx) = executorMetrics.metrics(idx)
       }
     }
     updated

diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsBasedSystems.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsBasedSystems.scala
@@ -45,29 +45,33 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
   var pageSize = computePageSize()
   var isAvailable: Boolean = isProcfsAvailable
   private val pid = computePid()
-  private var ptree = mutable.Map[ Int, Set[Int]]()
 
-  var allMetrics: ProcfsBasedSystemsMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
+  // var allMetrics: ProcfsBasedSystemsMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
 
   computeProcessTree()
 
-  private def isProcfsAvailable: Boolean = {
+  private lazy val isProcfsAvailable: Boolean = {
     if (testing) {
-      return true
+       true
     }
-    try {
-      if (!Files.exists(Paths.get(procfsDir))) {
-        return false
+    else {
+      var procDirExists = true
+      try {
+        if (!Files.exists(Paths.get(procfsDir))) {
+          procDirExists = false
+        }
       }
+      catch {
+        case f: IOException =>
+          logWarning("It seems that procfs isn't supported", f)
+          procDirExists = false
+      }
+      val shouldLogStageExecutorMetrics =
+        SparkEnv.get.conf.get(config.EVENT_LOG_STAGE_EXECUTOR_METRICS)
+      val shouldLogStageExecutorProcessTreeMetrics =
+        SparkEnv.get.conf.get(config.EVENT_LOG_PROCESS_TREE_METRICS)
+      procDirExists && shouldLogStageExecutorProcessTreeMetrics && shouldLogStageExecutorMetrics
     }
-    catch {
-      case f: FileNotFoundException => return false
-    }
-    val shouldLogStageExecutorMetrics =
-      SparkEnv.get.conf.get(config.EVENT_LOG_STAGE_EXECUTOR_METRICS)
-    val shouldLogStageExecutorProcessTreeMetrics =
-      SparkEnv.get.conf.get(config.EVENT_LOG_PROCESS_TREE_METRICS)
-    shouldLogStageExecutorProcessTreeMetrics && shouldLogStageExecutorMetrics
   }
 
   private def computePid(): Int = {
@@ -78,13 +82,13 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
       // This can be simplified in java9:
       // https://docs.oracle.com/javase/9/docs/api/java/lang/ProcessHandle.html
       val cmd = Array("bash", "-c", "echo $PPID")
-      val length = 10
       val out2 = Utils.executeAndGetOutput(cmd)
       val pid = Integer.parseInt(out2.split("\n")(0))
       return pid;
     }
     catch {
-      case e: SparkException => logWarning("Exception when trying to compute process tree." +
+      case e: SparkException =>
+        logWarning("Exception when trying to compute process tree." +
         " As a result reporting of ProcessTree metrics is stopped", e)
         isAvailable = false
         return -1
@@ -97,8 +101,8 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
     }
     try {
       val cmd = Array("getconf", "PAGESIZE")
-      val out2 = Utils.executeAndGetOutput(cmd)
-      return Integer.parseInt(out2.split("\n")(0))
+      val out = Utils.executeAndGetOutput(cmd)
+      return Integer.parseInt(out.split("\n")(0))
     } catch {
       case e: Exception => logWarning("Exception when trying to compute pagesize, as a" +
         " result reporting of ProcessTree metrics is stopped")
@@ -107,24 +111,23 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
     }
   }
 
-  private def computeProcessTree(): Unit = {
+  private def computeProcessTree(): Set[Int] = {
     if (!isAvailable || testing) {
-      return
+      return Set()
     }
-    ptree = mutable.Map[ Int, Set[Int]]()
+    var ptree: Set[Int] = Set()
+    ptree += pid
     val queue = mutable.Queue.empty[Int]
     queue += pid
     while( !queue.isEmpty ) {
       val p = queue.dequeue()
       val c = getChildPids(p)
       if(!c.isEmpty) {
         queue ++= c
-        ptree += (p -> c.toSet)
-      }
-      else {
-        ptree += (p -> Set[Int]())
+        ptree ++= c.toSet
       }
     }
+    ptree
   }
 
   private def getChildPids(pid: Int): ArrayBuffer[Int] = {
@@ -162,15 +165,17 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
     }
   }
 
-  def computeProcessInfo(pid: Int): Unit = {
-    /*
+  def computeProcessInfo(allMetrics: ProcfsBasedSystemsMetrics, pid: Int):
+  ProcfsBasedSystemsMetrics = {
+  /*
    * Hadoop ProcfsBasedProcessTree class used regex and pattern matching to retrive the memory
    * info. I tried that but found it not correct during tests, so I used normal string analysis
    * instead. The computation of RSS and Vmem are based on proc(5):
    * http://man7.org/linux/man-pages/man5/proc.5.html
    */
     try {
       val pidDir = new File(procfsDir, pid.toString)
+      var allMetricsUpdated = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
       Utils.tryWithResource( new InputStreamReader(
         new FileInputStream(
           new File(pidDir, procfsStatFile)), Charset.forName("UTF-8"))) { fReader =>
@@ -181,41 +186,42 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
             val vmem = procInfoSplit(22).toLong
             val rssPages = procInfoSplit(23).toLong
             if (procInfoSplit(1).toLowerCase(Locale.US).contains("java")) {
-              allMetrics = allMetrics.copy(
+              allMetricsUpdated = allMetrics.copy(
                 jvmVmemTotal = allMetrics.jvmVmemTotal + vmem,
                 jvmRSSTotal = allMetrics.jvmRSSTotal + (rssPages*pageSize)
               )
             }
             else if (procInfoSplit(1).toLowerCase(Locale.US).contains("python")) {
-              allMetrics = allMetrics.copy(
+              allMetricsUpdated = allMetrics.copy(
                 pythonVmemTotal = allMetrics.pythonVmemTotal + vmem,
                 pythonRSSTotal = allMetrics.pythonRSSTotal + (rssPages*pageSize)
               )
             }
             else {
-              allMetrics = allMetrics.copy(
+              allMetricsUpdated = allMetrics.copy(
                 otherVmemTotal = allMetrics.otherVmemTotal + vmem,
                 otherRSSTotal = allMetrics.otherRSSTotal + (rssPages*pageSize)
               )
             }
           }
         }
       }
+      allMetricsUpdated
     } catch {
       case f: FileNotFoundException => logWarning("There was a problem with reading" +
         " the stat file of the process. ", f)
+        ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
     }
   }
 
   private[spark] def computeAllMetrics(): ProcfsBasedSystemsMetrics = {
     if (!isAvailable) {
       return ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
     }
-    computeProcessTree
-    val pids = ptree.keySet
-    allMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
+    val pids = computeProcessTree
+    var allMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
     for (p <- pids) {
-      computeProcessInfo(p)
+      allMetrics = computeProcessInfo(allMetrics, p)
     }
     return allMetrics
   }

diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -28,62 +28,50 @@ import org.apache.spark.memory.MemoryManager
  * Executor metric types for executor-level metrics stored in ExecutorMetrics.
  */
 sealed trait ExecutorMetricType {
-  private[spark] def getMetricValue(memoryManager: MemoryManager): Long = 0
-  private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+  private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long] = {
     new Array[Long](0)
   }
-  private[spark] def names = Seq(getClass().getName().stripSuffix("$").split("""\.""").last)
+  private[spark] def names: Seq[String] = Seq()
 }
 
-private[spark] abstract class MemoryManagerExecutorMetricType(
-    f: MemoryManager => Long) extends ExecutorMetricType {
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
-    val metricAsSet = new Array[Long](names.length)
-    (0 until names.length ).foreach { idx =>
-      metricAsSet(idx) = (f(memoryManager))
-    }
-    metricAsSet
+sealed trait SingleValueExecutorMetricType extends ExecutorMetricType {
+  override private[spark] def names = Seq(getClass().getName().
+    stripSuffix("$").split("""\.""").last)
+
+  override private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long] = {
+    val metrics = new Array[Long](1)
+    metrics(0) = getMetricValue(memoryManager)
+    metrics
   }
+
+  private[spark] def getMetricValue(memoryManager: MemoryManager): Long = 0
+}
+
+private[spark] abstract class MemoryManagerExecutorMetricType(
+    f: MemoryManager => Long) extends SingleValueExecutorMetricType {
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     f(memoryManager)
   }
 }
 
 private[spark] abstract class MBeanExecutorMetricType(mBeanName: String)
-  extends ExecutorMetricType {
+  extends SingleValueExecutorMetricType {
   private val bean = ManagementFactory.newPlatformMXBeanProxy(
     ManagementFactory.getPlatformMBeanServer,
     new ObjectName(mBeanName).toString, classOf[BufferPoolMXBean])
 
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
-    val metricAsSet = new Array[Long](1)
-    metricAsSet(0) = bean.getMemoryUsed
-    metricAsSet
-  }
-
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     bean.getMemoryUsed
   }
 }
 
-case object JVMHeapMemory extends ExecutorMetricType {
-
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
-    val metricAsSet = new Array[Long](1)
-    metricAsSet(0) = ( ManagementFactory.getMemoryMXBean.getHeapMemoryUsage().getUsed())
-    metricAsSet
-  }
+case object JVMHeapMemory extends SingleValueExecutorMetricType {
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     ManagementFactory.getMemoryMXBean.getHeapMemoryUsage().getUsed()
   }
 }
 
-case object JVMOffHeapMemory extends ExecutorMetricType {
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
-    val metricAsSet = new Array[ Long](1)
-    metricAsSet(0) = ( ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage().getUsed())
-    metricAsSet
-  }
+case object JVMOffHeapMemory extends SingleValueExecutorMetricType {
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage().getUsed()
   }
@@ -97,7 +85,7 @@ case object ProcessTreeMetrics extends ExecutorMetricType {
     "ProcessTreePythonRSSMemory",
     "ProcessTreeOtherVMemory",
     "ProcessTreeOtherRSSMemory")
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+  override private[spark] def getMetricValues(memoryManager: MemoryManager): Array[Long] = {
     val allMetrics = ExecutorMetricType.pTreeInfo.computeAllMetrics()
     val processTreeMetrics = new Array[Long](names.length)
     processTreeMetrics(0) = allMetrics.jvmVmemTotal
@@ -152,14 +140,18 @@ private[spark] object ExecutorMetricType {
     ProcessTreeMetrics
   )
 
-  var definedMetricsAndOffset = mutable.LinkedHashMap.empty[String, Int]
-  var numberOfMetrics = 0
-  metricGetters.foreach { m =>
-    var metricInSet = 0
-    while (metricInSet < m.names.length) {
-      definedMetricsAndOffset += (m.names(metricInSet) -> (metricInSet + numberOfMetrics) )
-      metricInSet += 1
+
+  val (metricToOffset, numMetrics) = {
+    var numberOfMetrics = 0
+    val definedMetricsAndOffset = mutable.LinkedHashMap.empty[String, Int]
+    metricGetters.foreach { m =>
+      var metricInSet = 0
+      while (metricInSet < m.names.length) {
+        definedMetricsAndOffset += (m.names(metricInSet) -> (metricInSet + numberOfMetrics))
+        metricInSet += 1
+      }
+      numberOfMetrics += m.names.length
     }
-    numberOfMetrics += m.names.length
+    (definedMetricsAndOffset, numberOfMetrics)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -133,7 +133,7 @@ private[spark] class ExecutorMetricsJsonSerializer
       jsonGenerator: JsonGenerator,
       serializerProvider: SerializerProvider): Unit = {
     metrics.foreach { m: ExecutorMetrics =>
-      val metricsMap = ExecutorMetricType.definedMetricsAndOffset.map { case (metric, _) =>
+      val metricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) =>
         metric -> m.getMetricValue(metric)
       }
       jsonGenerator.writeObject(metricsMap)

diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -394,12 +394,9 @@ private[spark] object JsonProtocol {
 
   /** Convert executor metrics to JSON. */
   def executorMetricsToJson(executorMetrics: ExecutorMetrics): JValue = {
-    val metrics = for {
-      (m, _) <- ExecutorMetricType.definedMetricsAndOffset
-    } yield {
+    val metrics = ExecutorMetricType.metricToOffset.map { case (m, _) =>
       JField(m, executorMetrics.getMetricValue(m))
     }
-
     JObject(metrics.toSeq: _*)
   }
 
@@ -614,7 +611,7 @@ private[spark] object JsonProtocol {
   /** Extract the executor metrics from JSON. */
   def executorMetricsFromJson(json: JValue): ExecutorMetrics = {
     val metrics =
-      ExecutorMetricType.definedMetricsAndOffset.map { case (metric, _) =>
+      ExecutorMetricType.metricToOffset.map { case (metric, _) =>
         metric -> jsonOption(json \ metric).map(_.extract[Long]).getOrElse(0L)
       }
     new ExecutorMetrics(metrics.toMap)