diff --git a/bin/spark-class b/bin/spark-class index e710e388be1b..7a45ffe001d2 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -43,14 +43,14 @@ else SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION" fi -if [ ! -d "$SPARK_JARS_DIR" ]; then +if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING" ]; then echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2 echo "You need to build Spark before running this program." 1>&2 exit 1 +else + LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" fi -LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" - # Add the launcher build dir to the classpath if requested. if [ -n "$SPARK_PREPEND_CLASSES" ]; then LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH" diff --git a/dev/run-tests.py b/dev/run-tests.py index a1e6f1bdb560..a4a99e7ba3b2 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -349,16 +349,6 @@ def build_spark_sbt(hadoop_version): exec_sbt(profiles_and_goals) -def build_spark_assembly_sbt(hadoop_version): - # Enable all of the profiles for the build: - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags - sbt_goals = ["assembly/assembly"] - profiles_and_goals = build_profiles + sbt_goals - print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ", - " ".join(profiles_and_goals)) - exec_sbt(profiles_and_goals) - - def build_apache_spark(build_tool, hadoop_version): """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or `maven`). Defaults to using `sbt`.""" @@ -574,9 +564,6 @@ def main(): if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now detect_binary_inop_with_mima() - # Since we did not build assembly/assembly before running dev/mima, we need to - # do it here because the tests still rely on it; see SPARK-13294 for details. - build_spark_assembly_sbt(hadoop_version) # run the test suites run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index f6c7e07654ee..374bc70dd9db 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -144,10 +144,27 @@ List buildClassPath(String appClassPath) throws IOException { boolean isTesting = "1".equals(getenv("SPARK_TESTING")); if (prependClasses || isTesting) { String scala = getScalaVersion(); - List projects = Arrays.asList("core", "repl", "mllib", "graphx", - "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver", - "yarn", "launcher", - "common/network-common", "common/network-shuffle", "common/network-yarn"); + List projects = Arrays.asList( + "common/network-common", + "common/network-shuffle", + "common/network-yarn", + "common/sketch", + "common/tags", + "common/unsafe", + "core", + "examples", + "graphx", + "launcher", + "mllib", + "repl", + "sql/catalyst", + "sql/core", + "sql/hive", + "sql/hive-thriftserver", + "streaming", + "tools", + "yarn" + ); if (prependClasses) { if (!isTesting) { System.err.println( diff --git a/python/run-tests.py b/python/run-tests.py index a9f8854e6f66..25cc859ed6a9 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -54,10 +54,27 @@ def print_red(text): LOGGER = logging.getLogger() -def run_individual_python_test(test_name, pyspark_python): +def get_spark_dist_classpath(): + original_working_dir = os.getcwd() + os.chdir(SPARK_HOME) + cp = subprocess_check_output( + ["./build/sbt", "-Phive", "export assembly/managedClasspath"], universal_newlines=True) + cp = cp.strip().split("\n")[-1] + os.chdir(original_working_dir) + return cp + + +def run_individual_python_test(test_name, pyspark_python, spark_dist_classpath): env = dict(os.environ) - env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python), - 'PYSPARK_DRIVER_PYTHON': which(pyspark_python)}) + env.update({ + # Setting SPARK_DIST_CLASSPATH is a simple way to make sure that any child processes + # launched by the tests have access to the correct test-time classpath. + 'SPARK_DIST_CLASSPATH': spark_dist_classpath, + 'SPARK_TESTING': '1', + 'SPARK_PREPEND_CLASSES': '1', + 'PYSPARK_PYTHON': which(pyspark_python), + 'PYSPARK_DRIVER_PYTHON': which(pyspark_python), + }) LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name) start_time = time.time() try: @@ -175,6 +192,8 @@ def main(): priority = 100 task_queue.put((priority, (python_exec, test_goal))) + spark_dist_classpath = get_spark_dist_classpath() + def process_queue(task_queue): while True: try: @@ -182,7 +201,7 @@ def process_queue(task_queue): except Queue.Empty: break try: - run_individual_python_test(test_goal, python_exec) + run_individual_python_test(test_goal, python_exec, spark_dist_classpath) finally: task_queue.task_done()