Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
clean up tests a bit
  • Loading branch information
stefankandic committed Jun 25, 2024
commit b8594088a3b1c437f084ad03d084046ba35317c5
Original file line number Diff line number Diff line change
Expand Up @@ -352,18 +352,6 @@ case class StringContains(attribute: String, value: String) extends Filter {
Array(toV2Column(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
}

/**
* A.
* @param filter a.
* @param fullyTranslated a.
*/
@Evolving
case class TranslatedFilter(filter: Filter, fullyTranslated: Boolean) {
def withFilter(newFilter: Filter): TranslatedFilter = {
copy(filter = newFilter)
}
}

/**
* A filter that always evaluates to `true`.
*
Expand Down Expand Up @@ -393,3 +381,14 @@ case class AlwaysFalse() extends Filter {
@Evolving
object AlwaysFalse extends AlwaysFalse {
}

/**
* Filter that can be translated partially. It can be pushed down but if it is not fully translated
* then the original expression needs to be evaluated as well.
*/
@Evolving
case class TranslatedFilter(filter: Filter, fullyTranslated: Boolean) {
def withFilter(newFilter: Filter): TranslatedFilter = {
copy(filter = newFilter)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,8 @@ object DataSourceStrategy
// Data source filters that cannot be handled by `relation`. An unhandled filter means
// the data source cannot guarantee the rows returned can pass the filter.
// As a result we must return it so Spark can plan an extra filter operator.
val unhandledFilters = relation.unhandledFilters(pushedFilters.toArray).toSet
val unhandledFilters = relation.unhandledFilters(
translatedMap.values.map(_.filter).toArray).toSet
val unhandledPredicates = translatedMap.filter { case (p, f) =>
unhandledFilters.contains(f.filter)
}.keys
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@ class CollatedFilterPushDownToReadersSuite extends QueryTest
expectedPushedFilters: Seq[String],
expectedRowCount: Int): Unit = {
def testPushDown(dataSource: String, useV1: Boolean): Unit = {
val v1Source = if (useV1) dataSource else ""
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1Source) {
withTestTable(dataSource) {
val df = sql(s"SELECT * FROM $tblName WHERE $filterString")
val actualPushedFilters = getPushedFilters(df)
assert(actualPushedFilters.sorted === expectedPushedFilters.sorted)
assert(df.count() === expectedRowCount)
test(s"collation push down filter: $filterString, source: $dataSource, isV1: $useV1") {
val v1Source = if (useV1) dataSource else ""
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1Source) {
withTestTable(dataSource) {
val df = sql(s"SELECT * FROM $tblName WHERE $filterString")
val actualPushedFilters = getPushedFilters(df)
assert(actualPushedFilters.sorted === expectedPushedFilters.sorted)
assert(df.count() === expectedRowCount)
}
}
}
}
Expand Down Expand Up @@ -92,52 +94,44 @@ class CollatedFilterPushDownToReadersSuite extends QueryTest
}
}

test("asdf") {
testV1AndV2PushDown(
filterString = s"'aaa' COLLATE UNICODE = 'bbb' COLLATE UNICODE",
expectedPushedFilters = Seq.empty,
expectedRowCount = 0)

testV1AndV2PushDown(
filterString = s"$collatedCol = 'aaa'",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedCol)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedCol = 'aaa' OR $nonCollatedCol = 'aaa'",
expectedPushedFilters = Seq(s"Or(AlwaysTrue(),EqualTo($nonCollatedCol,aaa))"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedCol != 'aaa'",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedCol)"),
expectedRowCount = 1)

testV1AndV2PushDown(
filterString = s"NOT($collatedCol == 'aaa')",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedCol)"),
expectedRowCount = 1)

testV1AndV2PushDown(
filterString = s"$collatedStructFieldAccess = 'aaa'",
expectedPushedFilters = Seq(
"AlwaysTrue()", s"IsNotNull($collatedStructFieldAccess)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedStructFieldAccess = 'aaa'",
expectedPushedFilters = Seq(
"AlwaysTrue()", s"IsNotNull($collatedStructFieldAccess)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedArrayCol = array(collate('aaa', $lcaseCollation))",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedArrayCol)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"map_keys($collatedMapCol) != array(collate('aaa', $lcaseCollation))",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedMapCol)"),
expectedRowCount = 1)
}
testV1AndV2PushDown(
filterString = s"'aaa' COLLATE UNICODE = 'bbb' COLLATE UNICODE",
expectedPushedFilters = Seq.empty,
expectedRowCount = 0)

testV1AndV2PushDown(
filterString = s"$collatedCol = 'aaa'",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedCol)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedCol = 'aaa' OR $nonCollatedCol = 'aaa'",
expectedPushedFilters = Seq(s"Or(AlwaysTrue(),EqualTo($nonCollatedCol,aaa))"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedCol != 'aaa'",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedCol)"),
expectedRowCount = 1)

testV1AndV2PushDown(
filterString = s"NOT($collatedCol == 'aaa')",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedCol)"),
expectedRowCount = 1)

testV1AndV2PushDown(
filterString = s"$collatedStructFieldAccess = 'aaa'",
expectedPushedFilters = Seq(
"AlwaysTrue()", s"IsNotNull($collatedStructFieldAccess)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"$collatedArrayCol = array(collate('aaa', $lcaseCollation))",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedArrayCol)"),
expectedRowCount = 2)

testV1AndV2PushDown(
filterString = s"map_keys($collatedMapCol) != array(collate('aaa', $lcaseCollation))",
expectedPushedFilters = Seq("AlwaysTrue()", s"IsNotNull($collatedMapCol)"),
expectedRowCount = 1)
}