Skip to content

Commit 3fc8c62

Browse files
authored
Verify filter pushdown through dataset typing (#130)
* Start adding test for filter pushdown through datasets * Quick example that shows we still push the filter down to the datasourcev2 relation even with an as[] in the middle. Figured I'd add this since there was a blog post a thing about how Spark had problems with this awhile ago and some folks might still be avoiding typed datasets as a result.
1 parent b436ad7 commit 3fc8c62

File tree

1 file changed

+48
-0
lines changed

1 file changed

+48
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Happy Panda Example for DataFrames.
3+
* Computes the % of happy pandas. Very contrived.
4+
*/
5+
package com.highperformancespark.examples.dataframe
6+
7+
import scala.collection.mutable
8+
import scala.util.Random
9+
10+
import org.apache.spark.sql.DataFrame
11+
import org.apache.spark.sql.Row
12+
import org.apache.spark.sql.{SQLContext, SparkSession}
13+
import org.apache.spark.sql.types._
14+
15+
import com.highperformancespark.examples.dataframe.HappyPandas.PandaInfo
16+
import com.highperformancespark.examples.dataframe.HappyPandas.Pandas
17+
import com.holdenkarau.spark.testing._
18+
import org.scalatest.funsuite.AnyFunSuite
19+
import org.scalatest.matchers.should.Matchers._
20+
21+
case class ExtraMagic(
22+
place: String,
23+
pandaType: String,
24+
happyPandas: Integer,
25+
totalPandas: Integer,
26+
extraInfo: Integer)
27+
28+
29+
class PandaPlaceFilterPushdown extends AnyFunSuite with DataFrameSuiteBase {
30+
31+
override def appName: String = "pandaPlaceFilterPushdown"
32+
33+
val basicList = List(
34+
ExtraMagic("a", "b", 1, 2, 3),
35+
ExtraMagic("toronto", "b", 1, 2, 3),
36+
)
37+
38+
test("simpleFilterTest") {
39+
val sqlCtx = sqlContext
40+
import sqlCtx.implicits._
41+
val inputDF = sqlCtx.createDataFrame(basicList)
42+
val restrictedDF = inputDF.select($"place", $"pandaType", $"happyPandas", $"totalPandas")
43+
val switched = inputDF.as[PandaInfo]
44+
// Note if we write the filter with functional syntax it does not push down.
45+
val filtered = switched.filter($"place" === "a")
46+
assert(filtered.count() === 1)
47+
}
48+
}

0 commit comments

Comments
 (0)