Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
// Enable in-memory partition pruning for testing purposes
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
// Use Hive hash expression instead of the native one
TestHive.sessionState.functionRegistry.unregisterFunction("hash")
// Ensures that the plans generation use metastore relation and not OrcRelation
// Was done because SqlBuilder does not work with plans having logical relation
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false)
Expand All @@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc)
TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
TestHive.sessionState.functionRegistry.restore()

// For debugging dump some statistics about how much time was spent in various optimizer rules
logWarning(RuleExecutor.dumpTimeSpent())
Expand Down Expand Up @@ -553,7 +550,42 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"union31",
"union_date",
"varchar_2",
"varchar_join1"
"varchar_join1",

// These tests are based on the Hive's hash function, which is different from Spark
"auto_join_nulls",
"auto_join0",
"auto_join1",
"auto_join2",
"auto_join3",
"auto_join4",
"auto_join5",
"auto_join6",
"auto_join7",
"auto_join8",
"auto_join9",
"auto_join10",
"auto_join11",
"auto_join12",
"auto_join13",
"auto_join14",
"auto_join14_hadoop20",
"auto_join15",
"auto_join17",
"auto_join18",
"auto_join19",
"auto_join20",
"auto_join22",
"auto_join25",
"auto_join30",
"auto_join31",
"correlationoptimizer1",
"correlationoptimizer2",
"correlationoptimizer3",
"correlationoptimizer4",
"multiMapJoin1",
"orc_dictionary_threshold",
"udf_hash"
)

/**
Expand All @@ -573,39 +605,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"annotate_stats_part",
"annotate_stats_table",
"annotate_stats_union",
"auto_join0",
"auto_join1",
"auto_join10",
"auto_join11",
"auto_join12",
"auto_join13",
"auto_join14",
"auto_join14_hadoop20",
"auto_join15",
"auto_join17",
"auto_join18",
"auto_join19",
"auto_join2",
"auto_join20",
"auto_join21",
"auto_join22",
"auto_join23",
"auto_join24",
"auto_join25",
"auto_join26",
"auto_join27",
"auto_join28",
"auto_join3",
"auto_join30",
"auto_join31",
"auto_join4",
"auto_join5",
"auto_join6",
"auto_join7",
"auto_join8",
"auto_join9",
"auto_join_nulls",
"auto_join_reordering_values",
"binary_constant",
"binarysortable_1",
"cast1",
Expand All @@ -618,15 +617,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"compute_stats_long",
"compute_stats_string",
"convert_enum_to_string",
"correlationoptimizer1",
"correlationoptimizer10",
"correlationoptimizer11",
"correlationoptimizer13",
"correlationoptimizer14",
"correlationoptimizer15",
"correlationoptimizer2",
"correlationoptimizer3",
"correlationoptimizer4",
"correlationoptimizer6",
"correlationoptimizer7",
"correlationoptimizer8",
Expand Down Expand Up @@ -867,7 +862,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"merge2",
"merge4",
"mergejoins",
"multiMapJoin1",
"multiMapJoin2",
"multi_insert_gby",
"multi_insert_gby3",
Expand All @@ -889,7 +883,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"nullinput2",
"nullscript",
"optional_outer",
"orc_dictionary_threshold",
"order",
"order2",
"outer_join_ppr",
Expand Down Expand Up @@ -1022,7 +1015,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_from_unixtime",
"udf_greaterthan",
"udf_greaterthanorequal",
"udf_hash",
"udf_hex",
"udf_if",
"udf_index",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ private[sql] class HiveSessionCatalog(
// parse_url_tuple, posexplode, reflect2,
// str_to_map, windowingtablefunction.
private val hiveFunctions = Seq(
"hash",
"histogram_numeric",
"percentile",
"percentile_approx"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,24 +487,6 @@ private[hive] class TestHiveQueryExecution(
}
}


private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry {

private val removedFunctions =
collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))]

def unregisterFunction(name: String): Unit = synchronized {
functionBuilders.remove(name).foreach(f => removedFunctions += name -> f)
}

def restore(): Unit = synchronized {
removedFunctions.foreach {
case (name, (info, builder)) => registerFunction(name, info, builder)
}
}
}


private[hive] class TestHiveSessionState(
sparkSession: TestHiveSparkSession)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can still remove this class if we add back the removed spark builtin hash function manually right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think so

extends HiveSessionState(sparkSession) { self =>
Expand All @@ -520,16 +502,6 @@ private[hive] class TestHiveSessionState(
}
}

override lazy val functionRegistry: TestHiveFunctionRegistry = {
// We use TestHiveFunctionRegistry at here to track functions that have been explicitly
// unregistered (through TestHiveFunctionRegistry.unregisterFunction method).
val fr = new TestHiveFunctionRegistry
org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach {
case (name, (info, builder)) => fr.registerFunction(name, info, builder)
}
fr
}

override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
new TestHiveQueryExecution(sparkSession, plan)
}
Expand Down