Skip to content
Prev Previous commit
Next Next commit
just cache external function name
  • Loading branch information
kevinyu98 committed Jul 10, 2018
commit 5c6687c12656453d6fdc43ae8b1dee759c9ee294
Original file line number Diff line number Diff line change
Expand Up @@ -1207,29 +1207,32 @@ class Analyzer(
* only performs simple existence check according to the function identifier to quickly identify
* undefined functions without triggering relation resolution, which may incur potentially
* expensive partition/schema discovery process in some cases.
*
* In order to avoid duplicate external functions lookup, the external function identifier will
* store in the local hash set externalFunctionNameSet.
* @see [[ResolveFunctions]]
* @see https://issues.apache.org/jira/browse/SPARK-19737
*/
object LookupFunctions extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = {
val catalogFunctionNameSet = new mutable.HashSet[FunctionIdentifier]()
val externalFunctionNameSet = new mutable.HashSet[FunctionIdentifier]()
plan.transformAllExpressions {
case f: UnresolvedFunction
if catalogFunctionNameSet.contains(normalizeFuncName(f.name)) => f
case f: UnresolvedFunction if catalog.functionExists(f.name) =>
catalogFunctionNameSet.add(normalizeFuncName(f.name))
if externalFunctionNameSet.contains(normalizeFuncName(f.name)) => f
case f: UnresolvedFunction if catalog.buildinFunctionExists(f.name) => f
case f: UnresolvedFunction if catalog.externalFunctionExists(f.name) =>
externalFunctionNameSet.add(normalizeFuncName(f.name))
f
case f: UnresolvedFunction =>
withPosition(f) {
throw new NoSuchFunctionException(f.name.database.getOrElse("default"),
throw new NoSuchFunctionException(f.name.database.getOrElse(catalog.getCurrentDatabase),
f.name.funcName)
}
}
}

private def normalizeFuncName(name: FunctionIdentifier): FunctionIdentifier = {
FunctionIdentifier(name.funcName.toLowerCase(Locale.ROOT), name.database)
FunctionIdentifier(name.funcName.toLowerCase(Locale.ROOT),
name.database.orElse(Some(catalog.getCurrentDatabase)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinyu98 I have a question. So we normalize the funcName here. How about name.database ? Is that normalized already by the time we are here ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinyu98 how about consideration of conf.caseSensitiveAnalysis ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I will change the code for the name.database. Thanks.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,16 @@ class SessionCatalog(
externalCatalog.functionExists(db, name.funcName)
}

def buildinFunctionExists(name: FunctionIdentifier): Boolean = {
functionRegistry.functionExists(name)
}

def externalFunctionExists(name: FunctionIdentifier): Boolean = {
val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
requireDbExists(db)
externalCatalog.functionExists(db, name.funcName)
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid code duplication, you should modify the functionExists like:

def functionExists(name: FunctionIdentifier): Boolean = {
  buildinFunctionExists(name) || externalFunctionExists(name)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I will change.

// ----------------------------------------------------------------
// | Methods that interact with temporary and metastore functions |
// ----------------------------------------------------------------
Expand Down