Skip to content

Commit 45ed712

Browse files
cxzl25HyukjinKwon
authored andcommitted
[SPARK-31590][SQL] Metadata-only queries should not include subquery in partition filters
### What changes were proposed in this pull request? Metadata-only queries should not include subquery in partition filters. ### Why are the changes needed? Apply the `OptimizeMetadataOnlyQuery` rule again, will get the exception `Cannot evaluate expression: scalar-subquery`. ### Does this PR introduce any user-facing change? Yes. When `spark.sql.optimizer.metadataOnly` is enabled, it succeeds when the queries include subquery in partition filters. ### How was this patch tested? add UT Closes #28383 from cxzl25/fix_SPARK-31590. Authored-by: sychen <[email protected]> Signed-off-by: HyukjinKwon <[email protected]> (cherry picked from commit 588966d) Signed-off-by: HyukjinKwon <[email protected]>
1 parent b763c23 commit 45ed712

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
117117
case a: AttributeReference =>
118118
a.withName(relation.output.find(_.semanticEquals(a)).get.name)
119119
}
120-
}
120+
}.filterNot(SubqueryExpression.hasSubquery)
121121

122122
child transform {
123123
case plan if plan eq relation =>

sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSparkSession {
103103
"select partcol2, min(partcol1) from srcpart where partcol1 = 0 group by partcol2",
104104
"select max(c1) from (select partcol1 + 1 as c1 from srcpart where partcol1 = 0) t")
105105

106+
testMetadataOnly(
107+
"SPARK-31590 Metadata-only queries should not include subquery in partition filters",
108+
"""
109+
|SELECT partcol1, MAX(partcol2) AS partcol2
110+
|FROM srcpart
111+
|WHERE partcol1 = (
112+
| SELECT MAX(partcol1)
113+
| FROM srcpart
114+
|)
115+
|AND partcol2 = 'even'
116+
|GROUP BY partcol1
117+
|""".stripMargin
118+
)
119+
106120
testNotMetadataOnly(
107121
"Don't optimize metadata only query for non-partition columns",
108122
"select col1 from srcpart group by col1",

0 commit comments

Comments
 (0)