-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-2594][SQL] Support CACHE TABLE <name> AS SELECT ... #2397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
b803fc8
4e858d8
13c8e27
7459ce3
6758f80
eebc0c1
b5276b2
dc33895
aaf5b59
724b9db
e3265d0
bc0bffc
d8b37b2
8c9993c
fb1759b
394d5ca
c18aa38
d6e469d
8059cd2
a5f0beb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -166,3 +166,22 @@ case class DescribeCommand(child: SparkPlan, output: Seq[Attribute])( | |
| child.output.map(field => Row(field.name, field.dataType.toString, null)) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| */ | ||
| @DeveloperApi | ||
| case class CacheTableAsSelectCommand(tableName: String, plan: LogicalPlan) | ||
| extends LeafNode with Command { | ||
|
|
||
| override protected[sql] lazy val sideEffectResult = { | ||
| sqlContext.catalog.registerTable(None, tableName, sqlContext.executePlan(plan).analyzed) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Probably my final comment on this PR :) ) As described in PR #2382, we shouldn't store analyzed logical plan when registering tables any more (see here). To prevent duplicated code, I'd suggest to import sqlContext.executePlan(plan).logical.registerTempTable(tableName)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your comment. It is a good idea to import
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, yes you're right, we can use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems we cannot use the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry again, you're right, I mistook
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the code. Please review |
||
| sqlContext.cacheTable(tableName) | ||
| // It does the caching eager. | ||
| sqlContext.table(tableName).count | ||
| Seq.empty[Row] | ||
| } | ||
|
|
||
| override def output: Seq[Attribute] = Seq.empty | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -229,7 +229,13 @@ private[hive] object HiveQl { | |
| SetCommand(Some(key), Some(value)) | ||
| } | ||
| } else if (sql.trim.toLowerCase.startsWith("cache table")) { | ||
| CacheCommand(sql.trim.drop(12).trim, true) | ||
| sql.trim.drop(12).trim.split(" ").toSeq match { | ||
| case Seq(tableName) => | ||
| CacheCommand(tableName, true) | ||
| case Seq(tableName,as, select@_*) => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space after
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will suggest to move the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @liancheng Added space after @chenghao-intel I can see a difference between
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chenghao-intel I agree that currently our HiveQL syntax extension scheme is quite hacky and brittle in Spark SQL... Other commands like
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you @ravipesala @liancheng , let's improve that in the future. :) |
||
| CacheTableAsSelectCommand(tableName, | ||
| createPlan(sql.trim.drop(12 + tableName.length() + as.length() + 2))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this branch can be simplified as: case Seq(tableName, _, select @ _*) =>
CacheTableAsSelectCommand(tableName, createPlan(select.mkString(" ").trim)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Than you for your suggestion. It simplifies well.I have modified it. |
||
| } | ||
| } else if (sql.trim.toLowerCase.startsWith("uncache table")) { | ||
| CacheCommand(sql.trim.drop(14).trim, false) | ||
| } else if (sql.trim.toLowerCase.startsWith("add jar")) { | ||
|
|
@@ -243,15 +249,7 @@ private[hive] object HiveQl { | |
| } else if (sql.trim.startsWith("!")) { | ||
| ShellCommand(sql.drop(1)) | ||
| } else { | ||
| val tree = getAst(sql) | ||
| if (nativeCommands contains tree.getText) { | ||
| NativeCommand(sql) | ||
| } else { | ||
| nodeToPlan(tree) match { | ||
| case NativePlaceholder => NativeCommand(sql) | ||
| case other => other | ||
| } | ||
| } | ||
| createPlan(sql) | ||
| } | ||
| } catch { | ||
| case e: Exception => throw new ParseException(sql, e) | ||
|
|
@@ -262,6 +260,19 @@ private[hive] object HiveQl { | |
| """.stripMargin) | ||
| } | ||
| } | ||
|
|
||
| /** Creates LogicalPlan for a given HiveQL string. */ | ||
| def createPlan(sql: String) = { | ||
| val tree = getAst(sql) | ||
| if (nativeCommands contains tree.getText) { | ||
| NativeCommand(sql) | ||
| } else { | ||
| nodeToPlan(tree) match { | ||
| case NativePlaceholder => NativeCommand(sql) | ||
| case other => other | ||
| } | ||
| } | ||
| } | ||
|
|
||
| def parseDdl(ddl: String): Seq[Attribute] = { | ||
| val tree = | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can remove
as ~if we useopt(AS ~> select)in line 186.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for your suggestion. I have updated it as per your comment.