Skip to content

Commit 136a930

Browse files
committed
fix the failing test
1 parent 338ba11 commit 136a930

File tree

2 files changed

+170
-147
lines changed

2 files changed

+170
-147
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 164 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import scala.collection.mutable
2525
import scala.collection.mutable.ArrayBuffer
2626
import scala.util.{Failure, Random, Success, Try}
2727

28+
import org.apache.spark.internal.Logging
2829
import org.apache.spark.sql.AnalysisException
2930
import org.apache.spark.sql.catalyst._
3031
import org.apache.spark.sql.catalyst.catalog._
@@ -182,6 +183,157 @@ object AnalysisContext {
182183
}
183184
}
184185

186+
object Analyzer extends Logging {
187+
// support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
188+
private val literalFunctions: Seq[(String, () => Expression, Expression => String)] = Seq(
189+
(CurrentDate().prettyName, () => CurrentDate(), toPrettySQL(_)),
190+
(CurrentTimestamp().prettyName, () => CurrentTimestamp(), toPrettySQL(_)),
191+
(CurrentUser().prettyName, () => CurrentUser(), toPrettySQL),
192+
("user", () => CurrentUser(), toPrettySQL),
193+
(VirtualColumn.hiveGroupingIdName, () => GroupingID(Nil), _ => VirtualColumn.hiveGroupingIdName)
194+
)
195+
196+
/**
197+
* Literal functions do not require the user to specify braces when calling them
198+
* When an attributes is not resolvable, we try to resolve it as a literal function.
199+
*/
200+
private def resolveLiteralFunction(nameParts: Seq[String]): Option[NamedExpression] = {
201+
if (nameParts.length != 1) return None
202+
val name = nameParts.head
203+
literalFunctions.find(func => caseInsensitiveResolution(func._1, name)).map {
204+
case (_, getFuncExpr, getAliasName) =>
205+
val funcExpr = getFuncExpr()
206+
Alias(funcExpr, getAliasName(funcExpr))()
207+
}
208+
}
209+
210+
/**
211+
* Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
212+
* traversing the input expression in top-down manner. It must be top-down because we need to
213+
* skip over unbound lambda function expression. The lambda expressions are resolved in a
214+
* different place [[ResolveLambdaVariables]].
215+
*
216+
* Example :
217+
* SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
218+
*
219+
* In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables]].
220+
*/
221+
private def resolveExpression(
222+
expr: Expression,
223+
resolveColumnByName: Seq[String] => Option[Expression],
224+
getAttrCandidates: () => Seq[Attribute],
225+
resolver: Resolver,
226+
throws: Boolean): Expression = {
227+
def innerResolve(e: Expression, isTopLevel: Boolean): Expression = {
228+
if (e.resolved) return e
229+
e match {
230+
case f: LambdaFunction if !f.bound => f
231+
232+
case GetColumnByOrdinal(ordinal, _) =>
233+
val attrCandidates = getAttrCandidates()
234+
assert(ordinal >= 0 && ordinal < attrCandidates.length)
235+
attrCandidates(ordinal)
236+
237+
case GetViewColumnByNameAndOrdinal(
238+
viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
239+
val attrCandidates = getAttrCandidates()
240+
val matched = attrCandidates.filter(a => resolver(a.name, colName))
241+
if (matched.length != expectedNumCandidates) {
242+
throw QueryCompilationErrors.incompatibleViewSchemaChange(
243+
viewName, colName, expectedNumCandidates, matched, viewDDL)
244+
}
245+
matched(ordinal)
246+
247+
case u @ UnresolvedAttribute(nameParts) =>
248+
val result = withPosition(u) {
249+
resolveColumnByName(nameParts).orElse(resolveLiteralFunction(nameParts)).map {
250+
// We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
251+
// as we should resolve `UnresolvedAttribute` to a named expression. The caller side
252+
// can trim the top-level alias if it's safe to do so. Since we will call
253+
// CleanupAliases later in Analyzer, trim non top-level unnecessary alias is safe.
254+
case Alias(child, _) if !isTopLevel => child
255+
case other => other
256+
}.getOrElse(u)
257+
}
258+
logDebug(s"Resolving $u to $result")
259+
result
260+
261+
case u @ UnresolvedExtractValue(child, fieldName) =>
262+
val newChild = innerResolve(child, isTopLevel = false)
263+
if (newChild.resolved) {
264+
withOrigin(u.origin) {
265+
ExtractValue(newChild, fieldName, resolver)
266+
}
267+
} else {
268+
u.copy(child = newChild)
269+
}
270+
271+
case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
272+
}
273+
}
274+
275+
try {
276+
innerResolve(expr, isTopLevel = true)
277+
} catch {
278+
case ae: AnalysisException if !throws =>
279+
logDebug(ae.getMessage)
280+
expr
281+
}
282+
}
283+
284+
/**
285+
* Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
286+
* input plan's output attributes. In order to resolve the nested fields correctly, this function
287+
* makes use of `throws` parameter to control when to raise an AnalysisException.
288+
*
289+
* Example :
290+
* SELECT * FROM t ORDER BY a.b
291+
*
292+
* In the above example, after `a` is resolved to a struct-type column, we may fail to resolve `b`
293+
* if there is no such nested field named "b". We should not fail and wait for other rules to
294+
* resolve it if possible.
295+
*/
296+
def resolveExpressionByPlanOutput(
297+
expr: Expression,
298+
plan: LogicalPlan,
299+
resolver: Resolver,
300+
throws: Boolean = false): Expression = {
301+
resolveExpression(
302+
expr,
303+
resolveColumnByName = nameParts => {
304+
plan.resolve(nameParts, resolver)
305+
},
306+
getAttrCandidates = () => plan.output,
307+
resolver = resolver,
308+
throws = throws)
309+
}
310+
311+
/**
312+
* Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
313+
* input plan's children output attributes.
314+
*
315+
* @param e The expression need to be resolved.
316+
* @param q The LogicalPlan whose children are used to resolve expression's attribute.
317+
* @return resolved Expression.
318+
*/
319+
def resolveExpressionByPlanChildren(
320+
e: Expression,
321+
q: LogicalPlan,
322+
resolver: Resolver): Expression = {
323+
resolveExpression(
324+
e,
325+
resolveColumnByName = nameParts => {
326+
q.resolveChildren(nameParts, resolver)
327+
},
328+
getAttrCandidates = () => {
329+
assert(q.children.length == 1)
330+
q.children.head.output
331+
},
332+
resolver = resolver,
333+
throws = true)
334+
}
335+
}
336+
185337
/**
186338
* Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
187339
* [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
@@ -230,6 +382,18 @@ class Analyzer(override val catalogManager: CatalogManager)
230382

231383
def resolver: Resolver = conf.resolver
232384

385+
private def resolveExpressionByPlanOutput(
386+
expr: Expression,
387+
plan: LogicalPlan,
388+
throws: Boolean = false): Expression = {
389+
Analyzer.resolveExpressionByPlanOutput(expr, plan, resolver, throws)
390+
}
391+
private def resolveExpressionByPlanChildren(
392+
e: Expression,
393+
q: LogicalPlan): Expression = {
394+
Analyzer.resolveExpressionByPlanChildren(e, q, resolver)
395+
}
396+
233397
/**
234398
* If the plan cannot be resolved within maxIterations, analyzer will throw exception to inform
235399
* user to increase the value of SQLConf.ANALYZER_MAX_ITERATIONS.
@@ -1767,150 +1931,6 @@ class Analyzer(override val catalogManager: CatalogManager)
17671931
exprs.exists(_.exists(_.isInstanceOf[UnresolvedDeserializer]))
17681932
}
17691933

1770-
// support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
1771-
private val literalFunctions: Seq[(String, () => Expression, Expression => String)] = Seq(
1772-
(CurrentDate().prettyName, () => CurrentDate(), toPrettySQL(_)),
1773-
(CurrentTimestamp().prettyName, () => CurrentTimestamp(), toPrettySQL(_)),
1774-
(CurrentUser().prettyName, () => CurrentUser(), toPrettySQL),
1775-
("user", () => CurrentUser(), toPrettySQL),
1776-
(VirtualColumn.hiveGroupingIdName, () => GroupingID(Nil), _ => VirtualColumn.hiveGroupingIdName)
1777-
)
1778-
1779-
/**
1780-
* Literal functions do not require the user to specify braces when calling them
1781-
* When an attributes is not resolvable, we try to resolve it as a literal function.
1782-
*/
1783-
private def resolveLiteralFunction(nameParts: Seq[String]): Option[NamedExpression] = {
1784-
if (nameParts.length != 1) return None
1785-
val name = nameParts.head
1786-
literalFunctions.find(func => caseInsensitiveResolution(func._1, name)).map {
1787-
case (_, getFuncExpr, getAliasName) =>
1788-
val funcExpr = getFuncExpr()
1789-
Alias(funcExpr, getAliasName(funcExpr))()
1790-
}
1791-
}
1792-
1793-
/**
1794-
* Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
1795-
* traversing the input expression in top-down manner. It must be top-down because we need to
1796-
* skip over unbound lambda function expression. The lambda expressions are resolved in a
1797-
* different place [[ResolveLambdaVariables]].
1798-
*
1799-
* Example :
1800-
* SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
1801-
*
1802-
* In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables]].
1803-
*/
1804-
private def resolveExpression(
1805-
expr: Expression,
1806-
resolveColumnByName: Seq[String] => Option[Expression],
1807-
getAttrCandidates: () => Seq[Attribute],
1808-
throws: Boolean): Expression = {
1809-
def innerResolve(e: Expression, isTopLevel: Boolean): Expression = {
1810-
if (e.resolved) return e
1811-
e match {
1812-
case f: LambdaFunction if !f.bound => f
1813-
1814-
case GetColumnByOrdinal(ordinal, _) =>
1815-
val attrCandidates = getAttrCandidates()
1816-
assert(ordinal >= 0 && ordinal < attrCandidates.length)
1817-
attrCandidates(ordinal)
1818-
1819-
case GetViewColumnByNameAndOrdinal(
1820-
viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
1821-
val attrCandidates = getAttrCandidates()
1822-
val matched = attrCandidates.filter(a => resolver(a.name, colName))
1823-
if (matched.length != expectedNumCandidates) {
1824-
throw QueryCompilationErrors.incompatibleViewSchemaChange(
1825-
viewName, colName, expectedNumCandidates, matched, viewDDL)
1826-
}
1827-
matched(ordinal)
1828-
1829-
case u @ UnresolvedAttribute(nameParts) =>
1830-
val result = withPosition(u) {
1831-
resolveColumnByName(nameParts).orElse(resolveLiteralFunction(nameParts)).map {
1832-
// We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
1833-
// as we should resolve `UnresolvedAttribute` to a named expression. The caller side
1834-
// can trim the top-level alias if it's safe to do so. Since we will call
1835-
// CleanupAliases later in Analyzer, trim non top-level unnecessary alias is safe.
1836-
case Alias(child, _) if !isTopLevel => child
1837-
case other => other
1838-
}.getOrElse(u)
1839-
}
1840-
logDebug(s"Resolving $u to $result")
1841-
result
1842-
1843-
case u @ UnresolvedExtractValue(child, fieldName) =>
1844-
val newChild = innerResolve(child, isTopLevel = false)
1845-
if (newChild.resolved) {
1846-
withOrigin(u.origin) {
1847-
ExtractValue(newChild, fieldName, resolver)
1848-
}
1849-
} else {
1850-
u.copy(child = newChild)
1851-
}
1852-
1853-
case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
1854-
}
1855-
}
1856-
1857-
try {
1858-
innerResolve(expr, isTopLevel = true)
1859-
} catch {
1860-
case ae: AnalysisException if !throws =>
1861-
logDebug(ae.getMessage)
1862-
expr
1863-
}
1864-
}
1865-
1866-
/**
1867-
* Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
1868-
* input plan's output attributes. In order to resolve the nested fields correctly, this function
1869-
* makes use of `throws` parameter to control when to raise an AnalysisException.
1870-
*
1871-
* Example :
1872-
* SELECT * FROM t ORDER BY a.b
1873-
*
1874-
* In the above example, after `a` is resolved to a struct-type column, we may fail to resolve `b`
1875-
* if there is no such nested field named "b". We should not fail and wait for other rules to
1876-
* resolve it if possible.
1877-
*/
1878-
def resolveExpressionByPlanOutput(
1879-
expr: Expression,
1880-
plan: LogicalPlan,
1881-
throws: Boolean = false): Expression = {
1882-
resolveExpression(
1883-
expr,
1884-
resolveColumnByName = nameParts => {
1885-
plan.resolve(nameParts, resolver)
1886-
},
1887-
getAttrCandidates = () => plan.output,
1888-
throws = throws)
1889-
}
1890-
1891-
/**
1892-
* Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
1893-
* input plan's children output attributes.
1894-
*
1895-
* @param e The expression need to be resolved.
1896-
* @param q The LogicalPlan whose children are used to resolve expression's attribute.
1897-
* @return resolved Expression.
1898-
*/
1899-
def resolveExpressionByPlanChildren(
1900-
e: Expression,
1901-
q: LogicalPlan): Expression = {
1902-
resolveExpression(
1903-
e,
1904-
resolveColumnByName = nameParts => {
1905-
q.resolveChildren(nameParts, resolver)
1906-
},
1907-
getAttrCandidates = () => {
1908-
assert(q.children.length == 1)
1909-
q.children.head.output
1910-
},
1911-
throws = true)
1912-
}
1913-
19141934
/**
19151935
* In many dialects of SQL it is valid to use ordinal positions in order/sort by and group by
19161936
* clauses. This rule is to convert ordinal positions to the corresponding expressions in the

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAlias.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ import org.apache.spark.sql.internal.SQLConf
9595
object WrapLateralColumnAliasReference extends Rule[LogicalPlan] {
9696
import ResolveLateralColumnAliasReference.AliasEntry
9797

98+
def resolver: Resolver = conf.resolver
99+
98100
private def insertIntoAliasMap(
99101
a: Alias,
100102
idx: Int,
@@ -112,9 +114,10 @@ object WrapLateralColumnAliasReference extends Rule[LogicalPlan] {
112114
*/
113115
private def resolveByLateralAlias(
114116
nameParts: Seq[String], lateralAlias: Alias): Option[LateralColumnAliasReference] = {
115-
val resolvedAttr = SimpleAnalyzer.resolveExpressionByPlanOutput(
117+
val resolvedAttr = Analyzer.resolveExpressionByPlanOutput(
116118
expr = UnresolvedAttribute(nameParts),
117119
plan = LocalRelation(Seq(lateralAlias.toAttribute)),
120+
resolver = resolver,
118121
throws = false
119122
).asInstanceOf[NamedExpression]
120123
if (resolvedAttr.resolved) {
@@ -139,8 +142,8 @@ object WrapLateralColumnAliasReference extends Rule[LogicalPlan] {
139142
aliasMap: CaseInsensitiveMap[Seq[AliasEntry]]): NamedExpression = {
140143
e.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE, OUTER_REFERENCE)) {
141144
case u: UnresolvedAttribute if aliasMap.contains(u.nameParts.head) &&
142-
SimpleAnalyzer.resolveExpressionByPlanChildren(
143-
u, currentPlan).isInstanceOf[UnresolvedAttribute] =>
145+
Analyzer.resolveExpressionByPlanChildren(
146+
u, currentPlan, resolver).isInstanceOf[UnresolvedAttribute] =>
144147
val aliases = aliasMap.get(u.nameParts.head).get
145148
aliases.size match {
146149
case n if n > 1 =>

0 commit comments

Comments
 (0)