Applying latest review commments. Using Arrays instead of Map for ret…

…urning set of metrics.
apache · rezasafi · Jul 26, 2018 · Aug 7, 2018 · Aug 8, 2018 · Sep 25, 2018
commit 7f7ed2bdf5740bd2c4ae8cf2090ba7f016ffb023
diff --git a/core/src/main/scala/org/apache/spark/Heartbeater.scala b/core/src/main/scala/org/apache/spark/Heartbeater.scala
@@ -19,7 +19,6 @@ package org.apache.spark
 
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.deploy.history.LogInfo
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.memory.MemoryManager
@@ -60,12 +59,14 @@ private[spark] class Heartbeater(
     heartbeater.awaitTermination(10, TimeUnit.SECONDS)
   }
 
-  /** Get the current executor level metrics. These are returned as a Map */
+  /** Get the current executor level metrics. These are returned as an Array */
   def getCurrentMetrics(): ExecutorMetrics = {
-    // figure out how to append all the metrics
-    var metrics = Map.empty[String, Long]
+    val metrics = new Array[Long](ExecutorMetricType.numberOfMetrics)
+    var offset = 0
     ExecutorMetricType.metricGetters.foreach { metric =>
-       metrics ++= metric.getMetricSet(memoryManager)
+      val newSetOfMetrics = metric.getMetricSet(memoryManager)
+      Array.copy(newSetOfMetrics, 0, metrics, offset, newSetOfMetrics.size)
+      offset += newSetOfMetrics.length
     }
     new ExecutorMetrics(metrics)
   }

diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -30,14 +30,14 @@ import org.apache.spark.metrics.ExecutorMetricType
 @DeveloperApi
 class ExecutorMetrics private[spark] extends Serializable {
 
-  private val metrics = new Array[Long](ExecutorMetricType.definedMetrics.length)
+  private val metrics = new Array[Long](ExecutorMetricType.numberOfMetrics)
   // the first element is initialized to -1, indicating that the values for the array
   // haven't been set yet.
   metrics(0) = -1
 
   /** Returns the value for the specified metric. */
   def getMetricValue(metricName: String): Long = {
-    metrics(ExecutorMetricType.metricIdxMap(metricName))
+    metrics(ExecutorMetricType.definedMetricsAndOffset.get(metricName).get)
   }
 
   /** Returns true if the values for the metrics have been set, false otherwise. */
@@ -55,8 +55,8 @@ class ExecutorMetrics private[spark] extends Serializable {
    */
   private[spark] def this(executorMetrics: Map[String, Long]) {
     this()
-    (0 until ExecutorMetricType.definedMetrics.length).foreach { idx =>
-      metrics(idx) = executorMetrics.getOrElse(ExecutorMetricType.definedMetrics(idx), 0L)
+    ExecutorMetricType.definedMetricsAndOffset.map { m =>
+      metrics(m._2) = executorMetrics.getOrElse(m._1, 0L)
     }
   }
 
@@ -69,10 +69,10 @@ class ExecutorMetrics private[spark] extends Serializable {
    */
   private[spark] def compareAndUpdatePeakValues(executorMetrics: ExecutorMetrics): Boolean = {
     var updated = false
-    (0 until ExecutorMetricType.definedMetrics.length).foreach { idx =>
-      if (executorMetrics.metrics(idx) > metrics(idx)) {
+    ExecutorMetricType.definedMetricsAndOffset.map {m =>
+      if (executorMetrics.metrics(m._2) > metrics(m._2)) {
         updated = true
-        metrics(idx) = executorMetrics.metrics(idx)
+        metrics(m._2) = executorMetrics.metrics(m._2)
       }
     }
     updated

diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsBasedSystems.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsBasedSystems.scala
@@ -45,7 +45,7 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
   var pageSize = computePageSize()
   var isAvailable: Boolean = isProcfsAvailable
   private val pid = computePid()
-  private val ptree = mutable.Map[ Int, Set[Int]]()
+  private var ptree = mutable.Map[ Int, Set[Int]]()
 
   var allMetrics: ProcfsBasedSystemsMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
 
@@ -84,7 +84,7 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
       return pid;
     }
     catch {
-      case e: SparkException => logDebug("IO Exception when trying to compute process tree." +
+      case e: SparkException => logWarning("Exception when trying to compute process tree." +
         " As a result reporting of ProcessTree metrics is stopped", e)
         isAvailable = false
         return -1
@@ -95,15 +95,23 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
     if (testing) {
       return 0;
     }
-    val cmd = Array("getconf", "PAGESIZE")
-    val out2 = Utils.executeAndGetOutput(cmd)
-    return Integer.parseInt(out2.split("\n")(0))
+    try {
+      val cmd = Array("getconf", "PAGESIZE")
+      val out2 = Utils.executeAndGetOutput(cmd)
+      return Integer.parseInt(out2.split("\n")(0))
+    } catch {
+      case e: Exception => logWarning("Exception when trying to compute pagesize, as a" +
+        " result reporting of ProcessTree metrics is stopped")
+        isAvailable = false
+        return 0
+    }
   }
 
   private def computeProcessTree(): Unit = {
     if (!isAvailable || testing) {
       return
     }
+    ptree = mutable.Map[ Int, Set[Int]]()
     val queue = mutable.Queue.empty[Int]
     queue += pid
     while( !queue.isEmpty ) {
@@ -121,34 +129,34 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
 
   private def getChildPids(pid: Int): ArrayBuffer[Int] = {
     try {
-      val cmd = Array("pgrep", "-P", pid.toString)
+      // val cmd = Array("pgrep", "-P", pid.toString)
       val builder = new ProcessBuilder("pgrep", "-P", pid.toString)
       val process = builder.start()
-      val output = new StringBuilder()
+      // val output = new StringBuilder()
       val threadName = "read stdout for " + "pgrep"
-      def appendToOutput(s: String): Unit = output.append(s).append("\n")
+      val childPidsInInt = mutable.ArrayBuffer.empty[Int]
+      def appendChildPid(s: String): Unit = {
+        if (s != "") {
+          logDebug("Found a child pid:" + s)
+          childPidsInInt += Integer.parseInt(s)
+        }
+      }
       val stdoutThread = Utils.processStreamByLine(threadName,
-        process.getInputStream, appendToOutput)
+        process.getInputStream, appendChildPid)
       val exitCode = process.waitFor()
       stdoutThread.join()
       // pgrep will have exit code of 1 if there are more than one child process
       // and it will have a exit code of 2 if there is no child process
       if (exitCode != 0 && exitCode > 2) {
-        logError(s"Process $cmd exited with code $exitCode: $output")
+        val cmd = builder.command().toArray.mkString(" ")
+        logWarning(s"Process $cmd" +
+          s" exited with code $exitCode, with stderr:" + s"${process.getErrorStream} ")
         throw new SparkException(s"Process $cmd exited with code $exitCode")
       }
-      val childPids = output.toString.split("\n")
-      val childPidsInInt = mutable.ArrayBuffer.empty[Int]
-      for (p <- childPids) {
-        if (p != "") {
-          logDebug("Found a child pid: " + p)
-          childPidsInInt += Integer.parseInt(p)
-        }
-      }
       childPidsInInt
     } catch {
-      case e: IOException => logDebug("IO Exception when trying to compute process tree." +
-        " As a result reporting of ProcessTree metrics is stopped", e)
+      case e: Exception => logWarning("Exception when trying to compute process tree." +
+        " As a result reporting of ProcessTree metrics is stopped.", e)
         isAvailable = false
         return mutable.ArrayBuffer.empty[Int]
     }
@@ -173,54 +181,42 @@ private[spark] class ProcfsBasedSystems(val procfsDir: String = "/proc/") extend
             val vmem = procInfoSplit(22).toLong
             val rssPages = procInfoSplit(23).toLong
             if (procInfoSplit(1).toLowerCase(Locale.US).contains("java")) {
-              allMetrics = ProcfsBasedSystemsMetrics(
-                allMetrics.jvmVmemTotal + vmem,
-                allMetrics.jvmRSSTotal + (rssPages*pageSize),
-                allMetrics.pythonVmemTotal,
-                allMetrics.pythonRSSTotal,
-                allMetrics.otherVmemTotal,
-                allMetrics.otherRSSTotal
+              allMetrics = allMetrics.copy(
+                jvmVmemTotal = allMetrics.jvmVmemTotal + vmem,
+                jvmRSSTotal = allMetrics.jvmRSSTotal + (rssPages*pageSize)
               )
             }
             else if (procInfoSplit(1).toLowerCase(Locale.US).contains("python")) {
-              allMetrics = ProcfsBasedSystemsMetrics(
-                allMetrics.jvmVmemTotal,
-                allMetrics.jvmRSSTotal,
-                allMetrics.pythonVmemTotal + vmem,
-                allMetrics.pythonRSSTotal + (rssPages*pageSize),
-                allMetrics.otherVmemTotal,
-                allMetrics.otherRSSTotal
+              allMetrics = allMetrics.copy(
+                pythonVmemTotal = allMetrics.pythonVmemTotal + vmem,
+                pythonRSSTotal = allMetrics.pythonRSSTotal + (rssPages*pageSize)
               )
             }
             else {
-              allMetrics = ProcfsBasedSystemsMetrics(
-                allMetrics.jvmVmemTotal,
-                allMetrics.jvmRSSTotal,
-                allMetrics.pythonVmemTotal,
-                allMetrics.pythonRSSTotal,
-                allMetrics.otherVmemTotal + vmem,
-                allMetrics.otherRSSTotal + (rssPages*pageSize)
+              allMetrics = allMetrics.copy(
+                otherVmemTotal = allMetrics.otherVmemTotal + vmem,
+                otherRSSTotal = allMetrics.otherRSSTotal + (rssPages*pageSize)
               )
             }
           }
         }
       }
     } catch {
-      case f: FileNotFoundException => logDebug("There was a problem with reading" +
-        " the stat file of the process", f)
+      case f: FileNotFoundException => logWarning("There was a problem with reading" +
+        " the stat file of the process. ", f)
     }
   }
 
-  private[spark] def computeAllMetrics(): Unit = {
+  private[spark] def computeAllMetrics(): ProcfsBasedSystemsMetrics = {
     if (!isAvailable) {
-      allMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
-      return
+      return ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
     }
     computeProcessTree
     val pids = ptree.keySet
     allMetrics = ProcfsBasedSystemsMetrics(0, 0, 0, 0, 0, 0)
     for (p <- pids) {
       computeProcessInfo(p)
     }
+    return allMetrics
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -27,16 +27,19 @@ import org.apache.spark.memory.MemoryManager
  */
 sealed trait ExecutorMetricType {
   private[spark] def getMetricValue(memoryManager: MemoryManager): Long = 0
-  private[spark] def getMetricSet(memoryManager: MemoryManager): Map[String, Long] =
-    Map.empty[ String, Long]
-  private[spark] val name = getClass().getName().stripSuffix("$").split("""\.""").last
+  private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+    new Array[Long](0)
+  }
+  private[spark] def names = Seq(getClass().getName().stripSuffix("$").split("""\.""").last)
 }
 
 private[spark] abstract class MemoryManagerExecutorMetricType(
     f: MemoryManager => Long) extends ExecutorMetricType {
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Map[String, Long] = {
-    var metricAsSet = Map.empty[String, Long]
-    metricAsSet += (name -> f(memoryManager))
+  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+    val metricAsSet = new Array[Long](names.length)
+    (0 until names.length ).foreach { idx =>
+      metricAsSet(idx) = (f(memoryManager))
+    }
     metricAsSet
   }
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
@@ -50,15 +53,22 @@ private[spark] abstract class MBeanExecutorMetricType(mBeanName: String)
     ManagementFactory.getPlatformMBeanServer,
     new ObjectName(mBeanName).toString, classOf[BufferPoolMXBean])
 
+  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+    val metricAsSet = new Array[Long](1)
+    metricAsSet(0) = bean.getMemoryUsed
+    metricAsSet
+  }
+
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
     bean.getMemoryUsed
   }
 }
 
 case object JVMHeapMemory extends ExecutorMetricType {
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Map[String, Long] = {
-    var metricAsSet = Map.empty[String, Long]
-    metricAsSet += (name -> ManagementFactory.getMemoryMXBean.getHeapMemoryUsage().getUsed())
+
+  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+    val metricAsSet = new Array[Long](1)
+    metricAsSet(0) = ( ManagementFactory.getMemoryMXBean.getHeapMemoryUsage().getUsed())
     metricAsSet
   }
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
@@ -67,9 +77,9 @@ case object JVMHeapMemory extends ExecutorMetricType {
 }
 
 case object JVMOffHeapMemory extends ExecutorMetricType {
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Map[String, Long] = {
-    var metricAsSet = Map.empty[String, Long]
-    metricAsSet += (name -> ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage().getUsed())
+  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+    val metricAsSet = new Array[ Long](1)
+    metricAsSet(0) = ( ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage().getUsed())
     metricAsSet
   }
   override private[spark] def getMetricValue(memoryManager: MemoryManager): Long = {
@@ -78,21 +88,22 @@ case object JVMOffHeapMemory extends ExecutorMetricType {
 }
 
 case object ProcessTreeMetrics extends ExecutorMetricType {
-  override private[spark] def getMetricSet(memoryManager: MemoryManager): Map[String, Long] = {
-    ExecutorMetricType.pTreeInfo.computeAllMetrics()
-    var processTreeMetrics = Map.empty[String, Long]
-    processTreeMetrics += ("ProcessTreeJVMVMemory" ->
-      ExecutorMetricType.pTreeInfo.allMetrics.jvmVmemTotal )
-    processTreeMetrics += ("ProcessTreeJVMRSSMemory" ->
-      ExecutorMetricType.pTreeInfo.allMetrics.jvmRSSTotal )
-    processTreeMetrics += ("ProcessTreePythonVMemory" ->
-      ExecutorMetricType.pTreeInfo.allMetrics.pythonVmemTotal )
-    processTreeMetrics += ("ProcessTreePythonRSSMemory" ->
-      ExecutorMetricType.pTreeInfo.allMetrics.pythonRSSTotal )
-    processTreeMetrics += ("ProcessTreeOtherVMemory" ->
-      ExecutorMetricType.pTreeInfo.allMetrics.otherVmemTotal )
-    processTreeMetrics += ("ProcessTreeOtherRSSMemory" ->
-      ExecutorMetricType.pTreeInfo.allMetrics.otherRSSTotal )
+  override val names = Seq(
+    "ProcessTreeJVMVMemory",
+    "ProcessTreeJVMRSSMemory",
+    "ProcessTreePythonVMemory",
+    "ProcessTreePythonRSSMemory",
+    "ProcessTreeOtherVMemory",
+    "ProcessTreeOtherRSSMemory")
+  override private[spark] def getMetricSet(memoryManager: MemoryManager): Array[Long] = {
+    val allMetrics = ExecutorMetricType.pTreeInfo.computeAllMetrics()
+    val processTreeMetrics = new Array[Long](names.length)
+    processTreeMetrics(0) = allMetrics.jvmVmemTotal
+    processTreeMetrics(1) = allMetrics.jvmRSSTotal
+    processTreeMetrics(2) = allMetrics.pythonVmemTotal
+    processTreeMetrics(3) = allMetrics.pythonRSSTotal
+    processTreeMetrics(4) = allMetrics.otherVmemTotal
+    processTreeMetrics(5) = allMetrics.otherRSSTotal
     processTreeMetrics
   }
 }
@@ -138,26 +149,15 @@ private[spark] object ExecutorMetricType {
     MappedPoolMemory,
     ProcessTreeMetrics
   )
- // List of defined metrics
-  val definedMetrics = IndexedSeq(
-    "JVMHeapMemory",
-    "JVMOffHeapMemory",
-    "OnHeapExecutionMemory",
-    "OffHeapExecutionMemory",
-    "OnHeapStorageMemory",
-    "OffHeapStorageMemory",
-    "OnHeapUnifiedMemory",
-    "OffHeapUnifiedMemory",
-    "DirectPoolMemory",
-    "MappedPoolMemory",
-    "ProcessTreeJVMVMemory",
-    "ProcessTreeJVMRSSMemory",
-    "ProcessTreePythonVMemory",
-    "ProcessTreePythonRSSMemory",
-    "ProcessTreeOtherVMemory",
-    "ProcessTreeOtherRSSMemory"
-  )
 
-  val metricIdxMap =
-    Map[String, Int](ExecutorMetricType.definedMetrics.zipWithIndex: _*)
+  var definedMetricsAndOffset = Map.empty[String, Int]
+  var numberOfMetrics = 0
+  metricGetters.foreach { m =>
+    var metricInSet = 0
+    while (metricInSet < m.names.length) {
+      definedMetricsAndOffset += (m.names(metricInSet) -> (metricInSet + numberOfMetrics) )
+      metricInSet += 1
+    }
+    numberOfMetrics += m.names.length
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -133,9 +133,9 @@ private[spark] class ExecutorMetricsJsonSerializer
       jsonGenerator: JsonGenerator,
       serializerProvider: SerializerProvider): Unit = {
     metrics.foreach { m: ExecutorMetrics =>
-      val metricsMap = ExecutorMetricType.definedMetrics.map { metricType =>
-        metricType -> m.getMetricValue(metricType)
-      }.toMap
+      val metricsMap = ExecutorMetricType.definedMetricsAndOffset.map { case (metric, _) =>
+        metric -> m.getMetricValue(metric)
+      }
       jsonGenerator.writeObject(metricsMap)
     }
   }