Skip to content

Commit c9a6762

Browse files
petermaxleerxin
authored andcommitted
[SPARK-16199][SQL] Add a method to list the referenced columns in data source Filter
## What changes were proposed in this pull request? It would be useful to support listing the columns that are referenced by a filter. This can help simplify data source planning, because with this we would be able to implement unhandledFilters method in HadoopFsRelation. This is based on rxin's patch (#13901) and adds unit tests. ## How was this patch tested? Added a new suite FiltersSuite. Author: petermaxlee <[email protected]> Author: Reynold Xin <[email protected]> Closes #14120 from petermaxlee/SPARK-16199.
1 parent b1e5281 commit c9a6762

File tree

3 files changed

+151
-16
lines changed

3 files changed

+151
-16
lines changed

project/MimaExcludes.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ import com.typesafe.tools.mima.core.ProblemFilters._
3535
object MimaExcludes {
3636

3737
// Exclude rules for 2.1.x
38-
lazy val v21excludes = v20excludes
38+
lazy val v21excludes = v20excludes ++ {
39+
Seq(
40+
// [SPARK-16199][SQL] Add a method to list the referenced columns in data source Filter
41+
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.references")
42+
)
43+
}
3944

4045
// Exclude rules for 2.0.x
4146
lazy val v20excludes = {

sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,28 @@ package org.apache.spark.sql.sources
2626
*
2727
* @since 1.3.0
2828
*/
29-
abstract class Filter
29+
abstract class Filter {
30+
/**
31+
* List of columns that are referenced by this filter.
32+
* @since 2.1.0
33+
*/
34+
def references: Array[String]
35+
36+
protected def findReferences(value: Any): Array[String] = value match {
37+
case f: Filter => f.references
38+
case _ => Array.empty
39+
}
40+
}
3041

3142
/**
3243
* A filter that evaluates to `true` iff the attribute evaluates to a value
3344
* equal to `value`.
3445
*
3546
* @since 1.3.0
3647
*/
37-
case class EqualTo(attribute: String, value: Any) extends Filter
48+
case class EqualTo(attribute: String, value: Any) extends Filter {
49+
override def references: Array[String] = Array(attribute) ++ findReferences(value)
50+
}
3851

3952
/**
4053
* Performs equality comparison, similar to [[EqualTo]]. However, this differs from [[EqualTo]]
@@ -43,39 +56,49 @@ case class EqualTo(attribute: String, value: Any) extends Filter
4356
*
4457
* @since 1.5.0
4558
*/
46-
case class EqualNullSafe(attribute: String, value: Any) extends Filter
59+
case class EqualNullSafe(attribute: String, value: Any) extends Filter {
60+
override def references: Array[String] = Array(attribute) ++ findReferences(value)
61+
}
4762

4863
/**
4964
* A filter that evaluates to `true` iff the attribute evaluates to a value
5065
* greater than `value`.
5166
*
5267
* @since 1.3.0
5368
*/
54-
case class GreaterThan(attribute: String, value: Any) extends Filter
69+
case class GreaterThan(attribute: String, value: Any) extends Filter {
70+
override def references: Array[String] = Array(attribute) ++ findReferences(value)
71+
}
5572

5673
/**
5774
* A filter that evaluates to `true` iff the attribute evaluates to a value
5875
* greater than or equal to `value`.
5976
*
6077
* @since 1.3.0
6178
*/
62-
case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
79+
case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter {
80+
override def references: Array[String] = Array(attribute) ++ findReferences(value)
81+
}
6382

6483
/**
6584
* A filter that evaluates to `true` iff the attribute evaluates to a value
6685
* less than `value`.
6786
*
6887
* @since 1.3.0
6988
*/
70-
case class LessThan(attribute: String, value: Any) extends Filter
89+
case class LessThan(attribute: String, value: Any) extends Filter {
90+
override def references: Array[String] = Array(attribute) ++ findReferences(value)
91+
}
7192

7293
/**
7394
* A filter that evaluates to `true` iff the attribute evaluates to a value
7495
* less than or equal to `value`.
7596
*
7697
* @since 1.3.0
7798
*/
78-
case class LessThanOrEqual(attribute: String, value: Any) extends Filter
99+
case class LessThanOrEqual(attribute: String, value: Any) extends Filter {
100+
override def references: Array[String] = Array(attribute) ++ findReferences(value)
101+
}
79102

80103
/**
81104
* A filter that evaluates to `true` iff the attribute evaluates to one of the values in the array.
@@ -99,63 +122,81 @@ case class In(attribute: String, values: Array[Any]) extends Filter {
99122
override def toString: String = {
100123
s"In($attribute, [${values.mkString(",")}]"
101124
}
125+
126+
override def references: Array[String] = Array(attribute) ++ values.flatMap(findReferences)
102127
}
103128

104129
/**
105130
* A filter that evaluates to `true` iff the attribute evaluates to null.
106131
*
107132
* @since 1.3.0
108133
*/
109-
case class IsNull(attribute: String) extends Filter
134+
case class IsNull(attribute: String) extends Filter {
135+
override def references: Array[String] = Array(attribute)
136+
}
110137

111138
/**
112139
* A filter that evaluates to `true` iff the attribute evaluates to a non-null value.
113140
*
114141
* @since 1.3.0
115142
*/
116-
case class IsNotNull(attribute: String) extends Filter
143+
case class IsNotNull(attribute: String) extends Filter {
144+
override def references: Array[String] = Array(attribute)
145+
}
117146

118147
/**
119148
* A filter that evaluates to `true` iff both `left` or `right` evaluate to `true`.
120149
*
121150
* @since 1.3.0
122151
*/
123-
case class And(left: Filter, right: Filter) extends Filter
152+
case class And(left: Filter, right: Filter) extends Filter {
153+
override def references: Array[String] = left.references ++ right.references
154+
}
124155

125156
/**
126157
* A filter that evaluates to `true` iff at least one of `left` or `right` evaluates to `true`.
127158
*
128159
* @since 1.3.0
129160
*/
130-
case class Or(left: Filter, right: Filter) extends Filter
161+
case class Or(left: Filter, right: Filter) extends Filter {
162+
override def references: Array[String] = left.references ++ right.references
163+
}
131164

132165
/**
133166
* A filter that evaluates to `true` iff `child` is evaluated to `false`.
134167
*
135168
* @since 1.3.0
136169
*/
137-
case class Not(child: Filter) extends Filter
170+
case class Not(child: Filter) extends Filter {
171+
override def references: Array[String] = child.references
172+
}
138173

139174
/**
140175
* A filter that evaluates to `true` iff the attribute evaluates to
141176
* a string that starts with `value`.
142177
*
143178
* @since 1.3.1
144179
*/
145-
case class StringStartsWith(attribute: String, value: String) extends Filter
180+
case class StringStartsWith(attribute: String, value: String) extends Filter {
181+
override def references: Array[String] = Array(attribute)
182+
}
146183

147184
/**
148185
* A filter that evaluates to `true` iff the attribute evaluates to
149186
* a string that starts with `value`.
150187
*
151188
* @since 1.3.1
152189
*/
153-
case class StringEndsWith(attribute: String, value: String) extends Filter
190+
case class StringEndsWith(attribute: String, value: String) extends Filter {
191+
override def references: Array[String] = Array(attribute)
192+
}
154193

155194
/**
156195
* A filter that evaluates to `true` iff the attribute evaluates to
157196
* a string that contains the string `value`.
158197
*
159198
* @since 1.3.1
160199
*/
161-
case class StringContains(attribute: String, value: String) extends Filter
200+
case class StringContains(attribute: String, value: String) extends Filter {
201+
override def references: Array[String] = Array(attribute)
202+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.sources
19+
20+
import org.apache.spark.SparkFunSuite
21+
22+
/**
23+
* Unit test suites for data source filters.
24+
*/
25+
class FiltersSuite extends SparkFunSuite {
26+
27+
test("EqualTo references") {
28+
assert(EqualTo("a", "1").references.toSeq == Seq("a"))
29+
assert(EqualTo("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
30+
}
31+
32+
test("EqualNullSafe references") {
33+
assert(EqualNullSafe("a", "1").references.toSeq == Seq("a"))
34+
assert(EqualNullSafe("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
35+
}
36+
37+
test("GreaterThan references") {
38+
assert(GreaterThan("a", "1").references.toSeq == Seq("a"))
39+
assert(GreaterThan("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
40+
}
41+
42+
test("GreaterThanOrEqual references") {
43+
assert(GreaterThanOrEqual("a", "1").references.toSeq == Seq("a"))
44+
assert(GreaterThanOrEqual("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
45+
}
46+
47+
test("LessThan references") {
48+
assert(LessThan("a", "1").references.toSeq == Seq("a"))
49+
assert(LessThan("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
50+
}
51+
52+
test("LessThanOrEqual references") {
53+
assert(LessThanOrEqual("a", "1").references.toSeq == Seq("a"))
54+
assert(LessThanOrEqual("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
55+
}
56+
57+
test("In references") {
58+
assert(In("a", Array("1")).references.toSeq == Seq("a"))
59+
assert(In("a", Array("1", EqualTo("b", "2"))).references.toSeq == Seq("a", "b"))
60+
}
61+
62+
test("IsNull references") {
63+
assert(IsNull("a").references.toSeq == Seq("a"))
64+
}
65+
66+
test("IsNotNull references") {
67+
assert(IsNotNull("a").references.toSeq == Seq("a"))
68+
}
69+
70+
test("And references") {
71+
assert(And(EqualTo("a", "1"), EqualTo("b", "1")).references.toSeq == Seq("a", "b"))
72+
}
73+
74+
test("Or references") {
75+
assert(Or(EqualTo("a", "1"), EqualTo("b", "1")).references.toSeq == Seq("a", "b"))
76+
}
77+
78+
test("StringStartsWith references") {
79+
assert(StringStartsWith("a", "str").references.toSeq == Seq("a"))
80+
}
81+
82+
test("StringEndsWith references") {
83+
assert(StringEndsWith("a", "str").references.toSeq == Seq("a"))
84+
}
85+
86+
test("StringContains references") {
87+
assert(StringContains("a", "str").references.toSeq == Seq("a"))
88+
}
89+
}

0 commit comments

Comments
 (0)