Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.benchmark

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
Expand All @@ -29,9 +30,9 @@ import org.apache.spark.util.Benchmark
/**
* Benchmark to measure TPCDS query performance.
* To run this:
* spark-submit --class <this class> <spark sql test jar> <TPCDS data location>
* spark-submit --class <this class> <spark sql test jar> --data-location <TPCDS data location>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks incorrect?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, but I missed your point. what's correct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--data-location <TPCDS data location> [--query-filter Queries to filter]?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aha, thanks. better to add optional parameters here? I like a simple example here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I see.

*/
object TPCDSQueryBenchmark {
object TPCDSQueryBenchmark extends Logging {
val conf =
new SparkConf()
.setMaster("local[1]")
Expand Down Expand Up @@ -90,7 +91,9 @@ object TPCDSQueryBenchmark {
benchmark.addCase(name) { i =>
spark.sql(queryString).collect()
}
logInfo(s"\n\n===== TPCDS QUERY BENCHMARK OUTPUT FOR $name =====\n")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

benchmark.run()
logInfo(s"\n\n===== FINISHED $name =====\n")
}
}

Expand All @@ -110,6 +113,20 @@ object TPCDSQueryBenchmark {
"q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")

tpcdsAll(benchmarkArgs.dataLocation, queries = tpcdsQueries)
// If `--query-filter` defined, filters the queries that this option selects
val queriesToRun = if (benchmarkArgs.queryFilter.nonEmpty) {
val queries = tpcdsQueries.filter { case queryName =>
benchmarkArgs.queryFilter.contains(queryName)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add case insensitive?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, I like the idea.

}
if (queries.isEmpty) {
throw new RuntimeException(
s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
}
queries
} else {
tpcdsQueries
}

tpcdsAll(benchmarkArgs.dataLocation, queries = queriesToRun)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@

package org.apache.spark.sql.execution.benchmark

import java.util.Locale


class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
var dataLocation: String = null
var queryFilter: Set[String] = Set.empty

parseArgs(args.toList)
validateArguments()
Expand All @@ -32,6 +36,10 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
dataLocation = value
args = tail

case ("--query-filter") :: value :: tail =>
queryFilter = value.toLowerCase(Locale.ROOT).split(",").map(_.trim).toSet
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also make "--data-location" case insensitive?

Copy link
Member Author

@maropu maropu Sep 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

args = tail

case _ =>
// scalastyle:off println
System.err.println("Unknown/unsupported param " + args)
Expand All @@ -47,6 +55,7 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
|Usage: spark-submit --class <this class> <spark sql test jar> [Options]
|Options:
| --data-location Path to TPCDS data
| --query-filter Queries to filter, e.g., q3,q5,q13
|
|------------------------------------------------------------------------------------------------------------------
|In order to run this benchmark, please follow the instructions at
Expand Down