-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18217] [SQL] Disallow creating permanent views based on temporary views or UDFs #15764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
509327e
1b430bb
695110f
4dbd3b6
7100a8f
86e7f9d
a4df82b
1c3899f
fec0066
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -553,16 +553,16 @@ class SessionCatalog( | |
| val relationAlias = alias.getOrElse(table) | ||
| if (db == globalTempViewManager.database) { | ||
| globalTempViewManager.get(table).map { viewDef => | ||
| SubqueryAlias(relationAlias, viewDef, Some(name)) | ||
| SubqueryAlias(relationAlias, viewDef, Some(name))(isGeneratedByTempTable = true) | ||
| }.getOrElse(throw new NoSuchTableException(db, table)) | ||
| } else if (name.database.isDefined || !tempTables.contains(table)) { | ||
| val metadata = externalCatalog.getTable(db, table) | ||
| val view = Option(metadata.tableType).collect { | ||
| case CatalogTableType.VIEW => name | ||
| } | ||
| SubqueryAlias(relationAlias, SimpleCatalogRelation(db, metadata), view) | ||
| SubqueryAlias(relationAlias, SimpleCatalogRelation(db, metadata), view)() | ||
| } else { | ||
| SubqueryAlias(relationAlias, tempTables(table), Option(name)) | ||
| SubqueryAlias(relationAlias, tempTables(table), Option(name))(isGeneratedByTempTable = true) | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -923,6 +923,24 @@ class SessionCatalog( | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns whether it is a temporary function. | ||
| */ | ||
| def isTempFunction(name: FunctionIdentifier): Boolean = { | ||
| // copied from HiveSessionCatalog | ||
| val hiveFunctions = Seq( | ||
| "hash", | ||
| "histogram_numeric", | ||
| "percentile") | ||
|
|
||
| // A temporary function is a function that has been registered in functionRegistry | ||
| // without a database name, and is neither a built-in function nor a Hive function | ||
| name.database.isEmpty && | ||
| functionRegistry.functionExists(name.funcName) && | ||
| !FunctionRegistry.builtin.functionExists(name.funcName) && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our built-in function registry is using Thus, no need to add
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| !hiveFunctions.contains(name.funcName.toLowerCase) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is true - but we are working towards getting rid of HiveSessionCatalog though (including getting rid of the 3 fallback functions), so in practice this will make no difference soon. |
||
| } | ||
|
|
||
| protected def failFunctionLookup(name: String): Nothing = { | ||
| throw new NoSuchFunctionException(db = currentDb, func = name) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -709,10 +709,13 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo | |
| case class SubqueryAlias( | ||
| alias: String, | ||
| child: LogicalPlan, | ||
| view: Option[TableIdentifier]) | ||
| view: Option[TableIdentifier])( | ||
| val isGeneratedByTempTable: java.lang.Boolean = false) | ||
|
||
| extends UnaryNode { | ||
|
|
||
| override def output: Seq[Attribute] = child.output.map(_.withQualifier(Some(alias))) | ||
|
|
||
| override protected def otherCopyArgs: Seq[AnyRef] = isGeneratedByTempTable :: Nil | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.command | |
|
|
||
| import scala.util.control.NonFatal | ||
|
|
||
| import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession} | ||
| import org.apache.spark.sql.{AnalysisException, Row, SparkSession} | ||
| import org.apache.spark.sql.catalyst.{SQLBuilder, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.analysis.UnresolvedFunction | ||
| import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} | ||
| import org.apache.spark.sql.catalyst.expressions.Alias | ||
| import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} | ||
| import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation} | ||
| import org.apache.spark.sql.types.{MetadataBuilder, StructType} | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias} | ||
| import org.apache.spark.sql.types.MetadataBuilder | ||
|
|
||
|
|
||
| /** | ||
|
|
@@ -131,6 +131,27 @@ case class CreateViewCommand( | |
| s"specified by CREATE VIEW (num: `${userSpecifiedColumns.length}`).") | ||
| } | ||
|
|
||
| // When creating a permanent view, not allowed to reference temporary objects. | ||
| if (!isTemporary) { | ||
|
||
| // Disallow creating permanent views based on temporary views. | ||
| analyzedPlan.collectFirst { | ||
| case s: SubqueryAlias if s.isGeneratedByTempTable => | ||
| throw new AnalysisException(s"Not allowed to create a permanent view $name by " + | ||
| s"referencing a temp view `${s.alias}`. " + | ||
| originalText.map(sql => s"""SQL: "$sql".""").getOrElse("")) | ||
|
||
| } | ||
|
|
||
| // Disallow creating permanent views based on temporary UDFs. | ||
| child.collect { | ||
| case other if !other.resolved => other.expressions.flatMap(_.collect { | ||
| case e: UnresolvedFunction if sparkSession.sessionState.catalog.isTempFunction(e.name) => | ||
| throw new AnalysisException(s"Not allowed to create a permanent view $name by " + | ||
| s"referencing a temp function `${e.name}`. " + | ||
|
||
| originalText.map(sql => s"""SQL: "$sql".""").getOrElse("")) | ||
|
||
| }) | ||
| } | ||
| } | ||
|
|
||
| val aliasedPlan = if (userSpecifiedColumns.isEmpty) { | ||
| analyzedPlan | ||
| } else { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i'd update HiveSessionCatalog to say don't forget to update this place. Otherwise it will be inconsistent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do. Thanks!