Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added TPCDSQueryBenchmarkArguments.scala
  • Loading branch information
sarutak committed Sep 12, 2017
commit 06e306fdb4199a8c7850a6a370ce67aeac0cdf8e
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,7 @@ object TPCDSQueryBenchmark {
}

def main(args: Array[String]): Unit = {
if (args.length < 1) {
// scalastyle:off
println(
s"""
|Usage: spark-submit --class <this class> <spark sql test jar> <TPCDS data location>
|
|In order to run this benchmark, please follow the instructions at
|https://github.com/databricks/spark-sql-perf/blob/master/README.md
|to generate the TPCDS data locally (preferably with a scale factor of 5 for benchmarking).
|Thereafter, the value of <TPCDS data location> needs to be set to the location where the generated data is stored.
""".stripMargin)
// scalastyle:on
System.exit(1)
}
val benchmarkArgs = new TPCDSQueryBenchmarkArguments(args)

// List of all TPC-DS queries
val tpcdsQueries = Seq(
Expand All @@ -123,8 +110,6 @@ object TPCDSQueryBenchmark {
"q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")

val dataLocation = args(0)

tpcdsAll(dataLocation, queries = tpcdsQueries)
tpcdsAll(benchmarkArgs.dataLocation, queries = tpcdsQueries)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.benchmark

class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
var dataLocation: String = null

parseArgs(args.toList)
validateArguments()

private def parseArgs(inputArgs: List[String]) {
var args = inputArgs

while(args.nonEmpty) {
args match {
case ("--data-location") :: value :: tail =>
dataLocation = value
args = tail

case _ =>
// scalastyle:off println
System.err.println("Unknown/unsupported param " + args)
// scalastyle:on println
printUsageAndExit(1)
}
}
}

private def printUsageAndExit(exitCode: Int) {
// scalastyle:off println
System.err.println("""
|Usage: spark-submit --class <this class> <spark sql test jar> [Options]
|Options:
| --data-location Path to TPCDS data
|
|------------------------------------------------------------------------------------------------------------------
|In order to run this benchmark, please follow the instructions at
|https://github.com/databricks/spark-sql-perf/blob/master/README.md
|to generate the TPCDS data locally (preferably with a scale factor of 5 for benchmarking).
|Thereafter, the value of <TPCDS data location> needs to be set to the location where the generated data is stored.
""".stripMargin)
// scalastyle:on println
System.exit(exitCode)
}

private def validateArguments(): Unit = {
if (dataLocation == null) {
// scalastyle:off println
System.err.println("Must specify a data location")
// scalastyle:on println
printUsageAndExit(-1)
}
}
}