Skip to content
Prev Previous commit
Next Next commit
avoid metastore stack over flow
  • Loading branch information
ulysses-you committed Feb 26, 2021
commit b9c7f44cd027b01b7add0430560ceadbe4096625
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,9 @@ object SQLConf {
.doc("The threshold of set size for InSet predicate when pruning partitions through Hive " +
"Metastore. When the set size exceeds the threshold, we rewrite the InSet predicate " +
"to be greater than or equal to the minimum value in set and less than or equal to the " +
"maximum value in set. Larger values may cause Hive Metastore stack overflow.")
"maximum value in set. Larger values may cause Hive Metastore stack overflow. But if " +
"the predicate of Not InSet which values exceeds the threshold, we won't to push it " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

won't to push -> won't push

"to Hive Metastore.")
.version("3.1.0")
.internal()
.intConf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,9 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)),
LessThanOrEqual(child, Literal(sortedValues.last, dataType))))

case Not(InSet(_, values)) if values.size > inSetThreshold =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we move this to the beginning so that unsupported cases are grouped together?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved.

None

case InSet(child @ ExtractAttribute(SupportedAttribute(name)), ExtractableDateValues(values))
if useAdvanced && child.dataType == DateType =>
Some(convertInToOr(name, values))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,5 +211,13 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
}
}

test("Don't push not inset if it's values exceeds the threshold") {
withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "2") {
val filter = Not(InSet(a("p", IntegerType), Set(1, 2, 3)))
val converted = shim.convertFilters(testTable, Seq(filter), conf.sessionLocalTimeZone)
assert(converted.isEmpty)
}
}

private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)()
}