-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21368][SQL] TPCDSQueryBenchmark can't refer query files. #18592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
6da7419
e5669e4
14b188a
2022c45
7521b98
06e306f
d2d22d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,6 @@ | |
|
|
||
| package org.apache.spark.sql.execution.benchmark | ||
|
|
||
| import java.io.File | ||
|
|
||
| import org.apache.spark.SparkConf | ||
| import org.apache.spark.sql.SparkSession | ||
| import org.apache.spark.sql.catalyst.TableIdentifier | ||
|
|
@@ -31,7 +29,7 @@ import org.apache.spark.util.Benchmark | |
| /** | ||
| * Benchmark to measure TPCDS query performance. | ||
| * To run this: | ||
| * spark-submit --class <this class> --jars <spark sql test jar> | ||
| * spark-submit --class <this class> <spark sql test jar> <TPCDS data location> | ||
| */ | ||
| object TPCDSQueryBenchmark { | ||
| val conf = | ||
|
|
@@ -61,12 +59,10 @@ object TPCDSQueryBenchmark { | |
| } | ||
|
|
||
| def tpcdsAll(dataLocation: String, queries: Seq[String]): Unit = { | ||
| require(dataLocation.nonEmpty, | ||
| "please modify the value of dataLocation to point to your local TPCDS data") | ||
| val tableSizes = setupTables(dataLocation) | ||
| queries.foreach { name => | ||
| val queryString = fileToString(new File(Thread.currentThread().getContextClassLoader | ||
| .getResource(s"tpcds/$name.sql").getFile)) | ||
| val queryString = resourceToString(s"tpcds/$name.sql", | ||
| classLoader = Thread.currentThread().getContextClassLoader) | ||
|
|
||
| // This is an indirect hack to estimate the size of each query's input by traversing the | ||
| // logical plan and adding up the sizes of all tables that appear in the plan. Note that this | ||
|
|
@@ -99,6 +95,20 @@ object TPCDSQueryBenchmark { | |
| } | ||
|
|
||
| def main(args: Array[String]): Unit = { | ||
| if (args.length < 1) { | ||
|
||
| // scalastyle:off | ||
| println( | ||
| s""" | ||
| |Usage: spark-submit --class <this class> <spark sql test jar> <TPCDS data location> | ||
| | | ||
| |In order to run this benchmark, please follow the instructions at | ||
| |https://github.com/databricks/spark-sql-perf/blob/master/README.md | ||
|
||
| |to generate the TPCDS data locally (preferably with a scale factor of 5 for benchmarking). | ||
| |Thereafter, the value of <TPCDS data location> needs to be set to the location where the generated data is stored. | ||
| """.stripMargin) | ||
| // scalastyle:on | ||
| System.exit(1) | ||
| } | ||
|
|
||
| // List of all TPC-DS queries | ||
| val tpcdsQueries = Seq( | ||
|
|
@@ -113,11 +123,7 @@ object TPCDSQueryBenchmark { | |
| "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", | ||
| "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") | ||
|
|
||
| // In order to run this benchmark, please follow the instructions at | ||
| // https://github.com/databricks/spark-sql-perf/blob/master/README.md to generate the TPCDS data | ||
| // locally (preferably with a scale factor of 5 for benchmarking). Thereafter, the value of | ||
| // dataLocation below needs to be set to the location where the generated data is stored. | ||
| val dataLocation = "" | ||
| val dataLocation = args(0) | ||
|
|
||
| tpcdsAll(dataLocation, queries = tpcdsQueries) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Plz drop
import java.io.File.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropped.