@@ -25,6 +25,7 @@ import scala.collection.mutable
2525import scala .collection .mutable .ArrayBuffer
2626import scala .util .{Failure , Random , Success , Try }
2727
28+ import org .apache .spark .internal .Logging
2829import org .apache .spark .sql .AnalysisException
2930import org .apache .spark .sql .catalyst ._
3031import org .apache .spark .sql .catalyst .catalog ._
@@ -182,6 +183,157 @@ object AnalysisContext {
182183 }
183184}
184185
186+ object Analyzer extends Logging {
187+ // support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
188+ private val literalFunctions : Seq [(String , () => Expression , Expression => String )] = Seq (
189+ (CurrentDate ().prettyName, () => CurrentDate (), toPrettySQL(_)),
190+ (CurrentTimestamp ().prettyName, () => CurrentTimestamp (), toPrettySQL(_)),
191+ (CurrentUser ().prettyName, () => CurrentUser (), toPrettySQL),
192+ (" user" , () => CurrentUser (), toPrettySQL),
193+ (VirtualColumn .hiveGroupingIdName, () => GroupingID (Nil ), _ => VirtualColumn .hiveGroupingIdName)
194+ )
195+
196+ /**
197+ * Literal functions do not require the user to specify braces when calling them
198+ * When an attributes is not resolvable, we try to resolve it as a literal function.
199+ */
200+ private def resolveLiteralFunction (nameParts : Seq [String ]): Option [NamedExpression ] = {
201+ if (nameParts.length != 1 ) return None
202+ val name = nameParts.head
203+ literalFunctions.find(func => caseInsensitiveResolution(func._1, name)).map {
204+ case (_, getFuncExpr, getAliasName) =>
205+ val funcExpr = getFuncExpr()
206+ Alias (funcExpr, getAliasName(funcExpr))()
207+ }
208+ }
209+
210+ /**
211+ * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
212+ * traversing the input expression in top-down manner. It must be top-down because we need to
213+ * skip over unbound lambda function expression. The lambda expressions are resolved in a
214+ * different place [[ResolveLambdaVariables ]].
215+ *
216+ * Example :
217+ * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
218+ *
219+ * In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables ]].
220+ */
221+ private def resolveExpression (
222+ expr : Expression ,
223+ resolveColumnByName : Seq [String ] => Option [Expression ],
224+ getAttrCandidates : () => Seq [Attribute ],
225+ resolver : Resolver ,
226+ throws : Boolean ): Expression = {
227+ def innerResolve (e : Expression , isTopLevel : Boolean ): Expression = {
228+ if (e.resolved) return e
229+ e match {
230+ case f : LambdaFunction if ! f.bound => f
231+
232+ case GetColumnByOrdinal (ordinal, _) =>
233+ val attrCandidates = getAttrCandidates()
234+ assert(ordinal >= 0 && ordinal < attrCandidates.length)
235+ attrCandidates(ordinal)
236+
237+ case GetViewColumnByNameAndOrdinal (
238+ viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
239+ val attrCandidates = getAttrCandidates()
240+ val matched = attrCandidates.filter(a => resolver(a.name, colName))
241+ if (matched.length != expectedNumCandidates) {
242+ throw QueryCompilationErrors .incompatibleViewSchemaChange(
243+ viewName, colName, expectedNumCandidates, matched, viewDDL)
244+ }
245+ matched(ordinal)
246+
247+ case u @ UnresolvedAttribute (nameParts) =>
248+ val result = withPosition(u) {
249+ resolveColumnByName(nameParts).orElse(resolveLiteralFunction(nameParts)).map {
250+ // We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
251+ // as we should resolve `UnresolvedAttribute` to a named expression. The caller side
252+ // can trim the top-level alias if it's safe to do so. Since we will call
253+ // CleanupAliases later in Analyzer, trim non top-level unnecessary alias is safe.
254+ case Alias (child, _) if ! isTopLevel => child
255+ case other => other
256+ }.getOrElse(u)
257+ }
258+ logDebug(s " Resolving $u to $result" )
259+ result
260+
261+ case u @ UnresolvedExtractValue (child, fieldName) =>
262+ val newChild = innerResolve(child, isTopLevel = false )
263+ if (newChild.resolved) {
264+ withOrigin(u.origin) {
265+ ExtractValue (newChild, fieldName, resolver)
266+ }
267+ } else {
268+ u.copy(child = newChild)
269+ }
270+
271+ case _ => e.mapChildren(innerResolve(_, isTopLevel = false ))
272+ }
273+ }
274+
275+ try {
276+ innerResolve(expr, isTopLevel = true )
277+ } catch {
278+ case ae : AnalysisException if ! throws =>
279+ logDebug(ae.getMessage)
280+ expr
281+ }
282+ }
283+
284+ /**
285+ * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
286+ * input plan's output attributes. In order to resolve the nested fields correctly, this function
287+ * makes use of `throws` parameter to control when to raise an AnalysisException.
288+ *
289+ * Example :
290+ * SELECT * FROM t ORDER BY a.b
291+ *
292+ * In the above example, after `a` is resolved to a struct-type column, we may fail to resolve `b`
293+ * if there is no such nested field named "b". We should not fail and wait for other rules to
294+ * resolve it if possible.
295+ */
296+ def resolveExpressionByPlanOutput (
297+ expr : Expression ,
298+ plan : LogicalPlan ,
299+ resolver : Resolver ,
300+ throws : Boolean = false ): Expression = {
301+ resolveExpression(
302+ expr,
303+ resolveColumnByName = nameParts => {
304+ plan.resolve(nameParts, resolver)
305+ },
306+ getAttrCandidates = () => plan.output,
307+ resolver = resolver,
308+ throws = throws)
309+ }
310+
311+ /**
312+ * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
313+ * input plan's children output attributes.
314+ *
315+ * @param e The expression need to be resolved.
316+ * @param q The LogicalPlan whose children are used to resolve expression's attribute.
317+ * @return resolved Expression.
318+ */
319+ def resolveExpressionByPlanChildren (
320+ e : Expression ,
321+ q : LogicalPlan ,
322+ resolver : Resolver ): Expression = {
323+ resolveExpression(
324+ e,
325+ resolveColumnByName = nameParts => {
326+ q.resolveChildren(nameParts, resolver)
327+ },
328+ getAttrCandidates = () => {
329+ assert(q.children.length == 1 )
330+ q.children.head.output
331+ },
332+ resolver = resolver,
333+ throws = true )
334+ }
335+ }
336+
185337/**
186338 * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute ]]s and
187339 * [[UnresolvedRelation ]]s into fully typed objects using information in a [[SessionCatalog ]].
@@ -230,6 +382,18 @@ class Analyzer(override val catalogManager: CatalogManager)
230382
231383 def resolver : Resolver = conf.resolver
232384
385+ private def resolveExpressionByPlanOutput (
386+ expr : Expression ,
387+ plan : LogicalPlan ,
388+ throws : Boolean = false ): Expression = {
389+ Analyzer .resolveExpressionByPlanOutput(expr, plan, resolver, throws)
390+ }
391+ private def resolveExpressionByPlanChildren (
392+ e : Expression ,
393+ q : LogicalPlan ): Expression = {
394+ Analyzer .resolveExpressionByPlanChildren(e, q, resolver)
395+ }
396+
233397 /**
234398 * If the plan cannot be resolved within maxIterations, analyzer will throw exception to inform
235399 * user to increase the value of SQLConf.ANALYZER_MAX_ITERATIONS.
@@ -1767,150 +1931,6 @@ class Analyzer(override val catalogManager: CatalogManager)
17671931 exprs.exists(_.exists(_.isInstanceOf [UnresolvedDeserializer ]))
17681932 }
17691933
1770- // support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
1771- private val literalFunctions : Seq [(String , () => Expression , Expression => String )] = Seq (
1772- (CurrentDate ().prettyName, () => CurrentDate (), toPrettySQL(_)),
1773- (CurrentTimestamp ().prettyName, () => CurrentTimestamp (), toPrettySQL(_)),
1774- (CurrentUser ().prettyName, () => CurrentUser (), toPrettySQL),
1775- (" user" , () => CurrentUser (), toPrettySQL),
1776- (VirtualColumn .hiveGroupingIdName, () => GroupingID (Nil ), _ => VirtualColumn .hiveGroupingIdName)
1777- )
1778-
1779- /**
1780- * Literal functions do not require the user to specify braces when calling them
1781- * When an attributes is not resolvable, we try to resolve it as a literal function.
1782- */
1783- private def resolveLiteralFunction (nameParts : Seq [String ]): Option [NamedExpression ] = {
1784- if (nameParts.length != 1 ) return None
1785- val name = nameParts.head
1786- literalFunctions.find(func => caseInsensitiveResolution(func._1, name)).map {
1787- case (_, getFuncExpr, getAliasName) =>
1788- val funcExpr = getFuncExpr()
1789- Alias (funcExpr, getAliasName(funcExpr))()
1790- }
1791- }
1792-
1793- /**
1794- * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
1795- * traversing the input expression in top-down manner. It must be top-down because we need to
1796- * skip over unbound lambda function expression. The lambda expressions are resolved in a
1797- * different place [[ResolveLambdaVariables ]].
1798- *
1799- * Example :
1800- * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
1801- *
1802- * In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables ]].
1803- */
1804- private def resolveExpression (
1805- expr : Expression ,
1806- resolveColumnByName : Seq [String ] => Option [Expression ],
1807- getAttrCandidates : () => Seq [Attribute ],
1808- throws : Boolean ): Expression = {
1809- def innerResolve (e : Expression , isTopLevel : Boolean ): Expression = {
1810- if (e.resolved) return e
1811- e match {
1812- case f : LambdaFunction if ! f.bound => f
1813-
1814- case GetColumnByOrdinal (ordinal, _) =>
1815- val attrCandidates = getAttrCandidates()
1816- assert(ordinal >= 0 && ordinal < attrCandidates.length)
1817- attrCandidates(ordinal)
1818-
1819- case GetViewColumnByNameAndOrdinal (
1820- viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
1821- val attrCandidates = getAttrCandidates()
1822- val matched = attrCandidates.filter(a => resolver(a.name, colName))
1823- if (matched.length != expectedNumCandidates) {
1824- throw QueryCompilationErrors .incompatibleViewSchemaChange(
1825- viewName, colName, expectedNumCandidates, matched, viewDDL)
1826- }
1827- matched(ordinal)
1828-
1829- case u @ UnresolvedAttribute (nameParts) =>
1830- val result = withPosition(u) {
1831- resolveColumnByName(nameParts).orElse(resolveLiteralFunction(nameParts)).map {
1832- // We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
1833- // as we should resolve `UnresolvedAttribute` to a named expression. The caller side
1834- // can trim the top-level alias if it's safe to do so. Since we will call
1835- // CleanupAliases later in Analyzer, trim non top-level unnecessary alias is safe.
1836- case Alias (child, _) if ! isTopLevel => child
1837- case other => other
1838- }.getOrElse(u)
1839- }
1840- logDebug(s " Resolving $u to $result" )
1841- result
1842-
1843- case u @ UnresolvedExtractValue (child, fieldName) =>
1844- val newChild = innerResolve(child, isTopLevel = false )
1845- if (newChild.resolved) {
1846- withOrigin(u.origin) {
1847- ExtractValue (newChild, fieldName, resolver)
1848- }
1849- } else {
1850- u.copy(child = newChild)
1851- }
1852-
1853- case _ => e.mapChildren(innerResolve(_, isTopLevel = false ))
1854- }
1855- }
1856-
1857- try {
1858- innerResolve(expr, isTopLevel = true )
1859- } catch {
1860- case ae : AnalysisException if ! throws =>
1861- logDebug(ae.getMessage)
1862- expr
1863- }
1864- }
1865-
1866- /**
1867- * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
1868- * input plan's output attributes. In order to resolve the nested fields correctly, this function
1869- * makes use of `throws` parameter to control when to raise an AnalysisException.
1870- *
1871- * Example :
1872- * SELECT * FROM t ORDER BY a.b
1873- *
1874- * In the above example, after `a` is resolved to a struct-type column, we may fail to resolve `b`
1875- * if there is no such nested field named "b". We should not fail and wait for other rules to
1876- * resolve it if possible.
1877- */
1878- def resolveExpressionByPlanOutput (
1879- expr : Expression ,
1880- plan : LogicalPlan ,
1881- throws : Boolean = false ): Expression = {
1882- resolveExpression(
1883- expr,
1884- resolveColumnByName = nameParts => {
1885- plan.resolve(nameParts, resolver)
1886- },
1887- getAttrCandidates = () => plan.output,
1888- throws = throws)
1889- }
1890-
1891- /**
1892- * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
1893- * input plan's children output attributes.
1894- *
1895- * @param e The expression need to be resolved.
1896- * @param q The LogicalPlan whose children are used to resolve expression's attribute.
1897- * @return resolved Expression.
1898- */
1899- def resolveExpressionByPlanChildren (
1900- e : Expression ,
1901- q : LogicalPlan ): Expression = {
1902- resolveExpression(
1903- e,
1904- resolveColumnByName = nameParts => {
1905- q.resolveChildren(nameParts, resolver)
1906- },
1907- getAttrCandidates = () => {
1908- assert(q.children.length == 1 )
1909- q.children.head.output
1910- },
1911- throws = true )
1912- }
1913-
19141934 /**
19151935 * In many dialects of SQL it is valid to use ordinal positions in order/sort by and group by
19161936 * clauses. This rule is to convert ordinal positions to the corresponding expressions in the
0 commit comments