apache · kiszk · Oct 6, 2017 · Jul 16, 2018 · Jul 17, 2018 · Jul 31, 2018
diff --git a/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1177,8 +1177,7 @@ class CodegenContext {
     // be extremely expensive in certain cases, such as deeply-nested expressions which operate over
     // inputs with wide schemas. For more details on the performance issues that motivated this
     // flat, see SPARK-15680.
-    if (force ||
-      SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) {
+    if (force || SQLConf.get.codegenComments) {
       val name = if (placeholderId != "") {
         assert(!placeHolderToComments.contains(placeholderId))
         placeholderId

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -751,6 +751,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val MAX_CASES_BRANCHES = buildConf("spark.sql.codegen.maxCaseBranches")
+    .internal()
+    .doc("The maximum number of switches supported with codegen.")
+    .intConf
+    .createWithDefault(20)
+
   val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
     .internal()
     .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
@@ -1539,6 +1545,10 @@ class SQLConf extends Serializable with Logging {
 
   def codegenFallback: Boolean = getConf(CODEGEN_FALLBACK)
 
+  def maxCaseBranchesForCodegen: Int = getConf(MAX_CASES_BRANCHES)
+
+  def codegenComments: Boolean = getConf(StaticSQLConf.CODEGEN_COMMENTS)
+
   def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)
 
   def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -74,6 +74,13 @@ object StaticSQLConf {
       .checkValue(maxEntries => maxEntries >= 0, "The maximum must not be negative")
       .createWithDefault(100)
 
+  val CODEGEN_COMMENTS = buildStaticConf("spark.sql.codegen.comments")
+    .internal()
+    .doc("When true, put comment in the generated code. Since computing huge comments " +
+      "can be extremely expensive in certain cases, default is false.")
+    .booleanConf
+    .createWithDefault(false)
+
   // When enabling the debug, Spark SQL internal table properties are not filtered out; however,
   // some related DDL commands (e.g., ANALYZE TABLE and CREATE TABLE LIKE) might not work properly.
   val DEBUG_MODE = buildStaticConf("spark.sql.debug")

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.execution.debug.codegenStringSeq
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.test.SQLTestUtils
 
 class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
@@ -82,4 +84,22 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
       assert(checks.forall(_ == true))
     }
   }
+
+  test("SPARK-22219: refactor to control to generate comment") {
+    withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> "false") {
+      val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
+        .queryExecution.executedPlan)
+      assert(res.length == 2)
+      assert(res.forall{ case (_, code) =>
+        !code.contains("* Codegend pipeline") && !code.contains("// input[")})
+    }
+
+    withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> "true") {
+      val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
+        .queryExecution.executedPlan)
+      assert(res.length == 2)
+      assert(res.forall{ case (_, code) =>
+        code.contains("* Codegend pipeline") && code.contains("// input[")})
+    }
+  }
 }