Skip to content

Commit a57f87e

Browse files
committed
[SPARK-13454][SQL] Allow users to drop a table with a name starting with an underscore.
## What changes were proposed in this pull request? This change adds a workaround to allow users to drop a table with a name starting with an underscore. Without this patch, we can create such a table, but we cannot drop it. The reason is that Hive's parser unquote an quoted identifier (see https://github.com/apache/hive/blob/release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g#L453). So, when we issue a drop table command to Hive, a table name starting with an underscore is actually not quoted. Then, Hive will complain about it because it does not support a table name starting with an underscore without using backticks (underscores are allowed as long as it is not the first char though). ## How was this patch tested? Add a test to make sure we can drop a table with a name starting with an underscore. https://issues.apache.org/jira/browse/SPARK-13454 Author: Yin Huai <[email protected]> Closes #11349 from yhuai/fixDropTable.
1 parent abe8f99 commit a57f87e

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ package org.apache.spark.sql.hive.execution
2020
import org.apache.hadoop.hive.metastore.MetaStoreUtils
2121

2222
import org.apache.spark.sql._
23-
import org.apache.spark.sql.catalyst.TableIdentifier
23+
import org.apache.spark.sql.catalyst.{SqlParser, TableIdentifier}
2424
import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
2525
import org.apache.spark.sql.catalyst.expressions.Attribute
2626
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -70,7 +70,23 @@ case class DropTable(
7070
case e: Throwable => log.warn(s"${e.getMessage}", e)
7171
}
7272
hiveContext.invalidateTable(tableName)
73-
hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")
73+
val tableNameForHive = {
74+
// Hive's parser will unquote an identifier (see the rule of QuotedIdentifier in
75+
// HiveLexer.g of Hive 1.2.1). For the DROP TABLE command that we pass in Hive, we
76+
// will use the quoted form (db.tableName) if the table name starts with a _.
77+
// Otherwise, we keep the unquoted form (`db`.`tableName`), which is the same as tableName
78+
// passed into this DropTable class. Please note that although QuotedIdentifier rule
79+
// allows backticks appearing in an identifier, Hive does not actually allow such
80+
// an identifier be a table name. So, we do not check if a table name part has
81+
// any backtick or not.
82+
//
83+
// This change is at here because this patch is just for 1.6 branch and we try to
84+
// avoid of affecting normal cases (tables do not use _ as the first character of
85+
// their name).
86+
val identifier = SqlParser.parseTableIdentifier(tableName)
87+
if (identifier.table.startsWith("_")) identifier.quotedString else identifier.unquotedString
88+
}
89+
hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableNameForHive")
7490
hiveContext.catalog.unregisterTable(TableIdentifier(tableName))
7591
Seq.empty[Row]
7692
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,27 @@ class HiveMetastoreCatalogSuite extends SparkFunSuite with TestHiveSingleton {
4747
logInfo(df.queryExecution.toString)
4848
df.as('a).join(df.as('b), $"a.key" === $"b.key")
4949
}
50+
51+
test("SPARK-13454: drop a table with a name starting with underscore") {
52+
hiveContext.range(10).write.saveAsTable("_spark13454")
53+
hiveContext.range(20).registerTempTable("_spark13454")
54+
// This will drop both metastore table and temp table.
55+
hiveContext.sql("drop table `_spark13454`")
56+
assert(hiveContext.tableNames().filter(name => name == "_spark13454").length === 0)
57+
58+
hiveContext.range(10).write.saveAsTable("_spark13454")
59+
hiveContext.range(20).registerTempTable("_spark13454")
60+
hiveContext.sql("drop table default.`_spark13454`")
61+
// This will drop the metastore table but keep the temptable.
62+
assert(hiveContext.tableNames().filter(name => name == "_spark13454").length === 1)
63+
// Make sure it is the temp table.
64+
assert(hiveContext.table("_spark13454").count() === 20)
65+
hiveContext.sql("drop table if exists `_spark13454`")
66+
assert(hiveContext.tableNames().filter(name => name == "_spark13454").length === 0)
67+
68+
hiveContext.range(10).write.saveAsTable("spark13454")
69+
hiveContext.sql("drop table spark13454")
70+
}
5071
}
5172

5273
class DataSourceWithHiveMetastoreCatalogSuite

0 commit comments

Comments
 (0)