Skip to content

Commit c4cebd5

Browse files
ericlrxin
authored andcommitted
[SPARK-16238] Metrics for generated method and class bytecode size
## What changes were proposed in this pull request? This extends SPARK-15860 to include metrics for the actual bytecode size of janino-generated methods. They can be accessed in the same way as any other codahale metric, e.g. ``` scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot().getValues() res7: Array[Long] = Array(532, 532, 532, 542, 1479, 2670, 3585, 3585) scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot().getValues() res8: Array[Long] = Array(5, 5, 5, 5, 10, 10, 10, 10, 15, 15, 15, 38, 63, 79, 88, 94, 94, 94, 132, 132, 165, 165, 220, 220) ``` ## How was this patch tested? Small unit test, also verified manually that the performance impact is minimal (<10%). hvanhovell Author: Eric Liang <ekl@databricks.com> Closes #13934 from ericl/spark-16238. (cherry picked from commit 23c5865) Signed-off-by: Reynold Xin <rxin@databricks.com>
1 parent ef0253f commit c4cebd5

File tree

3 files changed

+55
-1
lines changed

3 files changed

+55
-1
lines changed

core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,16 @@ object CodegenMetrics extends Source {
4747
* Histogram of the time it took to compile source code text (in milliseconds).
4848
*/
4949
val METRIC_COMPILATION_TIME = metricRegistry.histogram(MetricRegistry.name("compilationTime"))
50+
51+
/**
52+
* Histogram of the bytecode size of each class generated by CodeGenerator.
53+
*/
54+
val METRIC_GENERATED_CLASS_BYTECODE_SIZE =
55+
metricRegistry.histogram(MetricRegistry.name("generatedClassSize"))
56+
57+
/**
58+
* Histogram of the bytecode size of each method in classes generated by CodeGenerator.
59+
*/
60+
val METRIC_GENERATED_METHOD_BYTECODE_SIZE =
61+
metricRegistry.histogram(MetricRegistry.name("generatedMethodSize"))
5062
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,16 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions.codegen
1919

20+
import java.io.ByteArrayInputStream
21+
import java.util.{Map => JavaMap}
22+
23+
import scala.collection.JavaConverters._
2024
import scala.collection.mutable
2125
import scala.collection.mutable.ArrayBuffer
2226

2327
import com.google.common.cache.{CacheBuilder, CacheLoader}
24-
import org.codehaus.janino.ClassBodyEvaluator
28+
import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, SimpleCompiler}
29+
import org.codehaus.janino.util.ClassFile
2530
import scala.language.existentials
2631

2732
import org.apache.spark.SparkEnv
@@ -876,6 +881,7 @@ object CodeGenerator extends Logging {
876881

877882
try {
878883
evaluator.cook("generated.java", code.body)
884+
recordCompilationStats(evaluator)
879885
} catch {
880886
case e: Exception =>
881887
val msg = s"failed to compile: $e\n$formatted"
@@ -885,6 +891,38 @@ object CodeGenerator extends Logging {
885891
evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]
886892
}
887893

894+
/**
895+
* Records the generated class and method bytecode sizes by inspecting janino private fields.
896+
*/
897+
private def recordCompilationStats(evaluator: ClassBodyEvaluator): Unit = {
898+
// First retrieve the generated classes.
899+
val classes = {
900+
val resultField = classOf[SimpleCompiler].getDeclaredField("result")
901+
resultField.setAccessible(true)
902+
val loader = resultField.get(evaluator).asInstanceOf[ByteArrayClassLoader]
903+
val classesField = loader.getClass.getDeclaredField("classes")
904+
classesField.setAccessible(true)
905+
classesField.get(loader).asInstanceOf[JavaMap[String, Array[Byte]]].asScala
906+
}
907+
908+
// Then walk the classes to get at the method bytecode.
909+
val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
910+
val codeAttrField = codeAttr.getDeclaredField("code")
911+
codeAttrField.setAccessible(true)
912+
classes.foreach { case (_, classBytes) =>
913+
CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
914+
val cf = new ClassFile(new ByteArrayInputStream(classBytes))
915+
cf.methodInfos.asScala.foreach { method =>
916+
method.getAttributes().foreach { a =>
917+
if (a.getClass.getName == codeAttr.getName) {
918+
CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(
919+
codeAttrField.get(a).asInstanceOf[Array[Byte]].length)
920+
}
921+
}
922+
}
923+
}
924+
}
925+
888926
/**
889927
* A cache of generated classes.
890928
*

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,13 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
5353
test("metrics are recorded on compile") {
5454
val startCount1 = CodegenMetrics.METRIC_COMPILATION_TIME.getCount()
5555
val startCount2 = CodegenMetrics.METRIC_SOURCE_CODE_SIZE.getCount()
56+
val startCount3 = CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getCount()
57+
val startCount4 = CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount()
5658
GenerateOrdering.generate(Add(Literal(123), Literal(1)).asc :: Nil)
5759
assert(CodegenMetrics.METRIC_COMPILATION_TIME.getCount() == startCount1 + 1)
5860
assert(CodegenMetrics.METRIC_SOURCE_CODE_SIZE.getCount() == startCount2 + 1)
61+
assert(CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getCount() > startCount1)
62+
assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount() > startCount1)
5963
}
6064

6165
test("SPARK-8443: split wide projections into blocks due to JVM code size limit") {

0 commit comments

Comments
 (0)