-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21368][SQL] TPCDSQueryBenchmark can't refer query files. #18592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
6da7419
e5669e4
14b188a
2022c45
7521b98
06e306f
d2d22d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,8 +65,8 @@ object TPCDSQueryBenchmark { | |
| "please modify the value of dataLocation to point to your local TPCDS data") | ||
| val tableSizes = setupTables(dataLocation) | ||
| queries.foreach { name => | ||
| val queryString = fileToString(new File(Thread.currentThread().getContextClassLoader | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Plz drop
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropped. |
||
| .getResource(s"tpcds/$name.sql").getFile)) | ||
| val queryString = resourceToString(s"tpcds/$name.sql", "UTF-8", | ||
| Thread.currentThread().getContextClassLoader) | ||
|
||
|
|
||
| // This is an indirect hack to estimate the size of each query's input by traversing the | ||
| // logical plan and adding up the sizes of all tables that appear in the plan. Note that this | ||
|
|
@@ -99,6 +99,13 @@ object TPCDSQueryBenchmark { | |
| } | ||
|
|
||
| def main(args: Array[String]): Unit = { | ||
| if (args.length < 1) { | ||
|
||
| // scalastyle:off println | ||
| println( | ||
| "Usage: spark-submit --class <this class> --jars <spark sql test jar> <data location>") | ||
|
||
| // scalastyle:on println | ||
| System.exit(1) | ||
| } | ||
|
|
||
| // List of all TPC-DS queries | ||
| val tpcdsQueries = Seq( | ||
|
|
@@ -117,7 +124,7 @@ object TPCDSQueryBenchmark { | |
| // https://github.com/databricks/spark-sql-perf/blob/master/README.md to generate the TPCDS data | ||
| // locally (preferably with a scale factor of 5 for benchmarking). Thereafter, the value of | ||
| // dataLocation below needs to be set to the location where the generated data is stored. | ||
| val dataLocation = "" | ||
| val dataLocation = args(0) | ||
|
|
||
| tpcdsAll(dataLocation, queries = tpcdsQueries) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems we don't need this check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this is no longer needed.