Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Remove assembly in tests.
  • Loading branch information
JoshRosen committed Mar 14, 2016
commit 2c101932a7b94ff8b4aa14b09bea6728da4a4bdd
15 changes: 1 addition & 14 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def get_hadoop_profiles(hadoop_version):
def build_spark_maven(hadoop_version):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
mvn_goals = ["clean", "package", "-DskipTests"]
mvn_goals = ["clean", "package", "-DskipTests", "-pl", "!assembly"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're looking at speeding up the build, building and testing in one shot might be a good thing to do at least for maven. Using -fae would allow the build to go as far as it can when something fails.

No need to do that in this change, though.

profiles_and_goals = build_profiles + mvn_goals

print("[info] Building Spark (w/Hive 1.2.1) using Maven with these arguments: ",
Expand All @@ -349,16 +349,6 @@ def build_spark_sbt(hadoop_version):
exec_sbt(profiles_and_goals)


def build_spark_assembly_sbt(hadoop_version):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
sbt_goals = ["assembly/assembly"]
profiles_and_goals = build_profiles + sbt_goals
print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
" ".join(profiles_and_goals))
exec_sbt(profiles_and_goals)


def build_apache_spark(build_tool, hadoop_version):
"""Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
`maven`). Defaults to using `sbt`."""
Expand Down Expand Up @@ -574,9 +564,6 @@ def main():
if build_tool == "sbt":
# Note: compatibility tests only supported in sbt for now
detect_binary_inop_with_mima()
# Since we did not build assembly/assembly before running dev/mima, we need to
# do it here because the tests still rely on it; see SPARK-13294 for details.
build_spark_assembly_sbt(hadoop_version)

# run the test suites
run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,38 @@ List<String> buildClassPath(String appClassPath) throws IOException {
boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
if (prependClasses || isTesting) {
String scala = getScalaVersion();
List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
"streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
"yarn", "launcher",
"common/network-common", "common/network-shuffle", "common/network-yarn");
// All projects except assemblies:
List<String> projects = Arrays.asList(
"common/network-common",
"common/network-shuffle",
"common/network-yarn",
"common/sketch",
"common/tags",
"common/unsafe",
"core",
"examples",
"external/akka",
"external/docker-integration-tests",
"external/flume",
"external/flume-sink",
"external/kafka",
"external/kinesis-asl",
"external/mqtt",
"external/spark-ganglia-lgpl",
"external/twitter",
"external/zeromq",
"graphx",
"launcher",
"mllib",
"repl",
"sql/catalyst",
"sql/core",
"sql/hive",
"sql/hive-thriftserver",
"streaming",
"tools",
"yarn"
);
if (prependClasses) {
if (!isTesting) {
System.err.println(
Expand Down
27 changes: 23 additions & 4 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,27 @@ def print_red(text):
LOGGER = logging.getLogger()


def run_individual_python_test(test_name, pyspark_python):
def get_spark_dist_classpath():
original_working_dir = os.getcwd()
os.chdir(SPARK_HOME)
cp = subprocess_check_output(
["./build/sbt", "export assembly/managedClasspath"], universal_newlines=True)
cp = cp.strip().split("\n")[-1]
os.chdir(original_working_dir)
return cp


def run_individual_python_test(test_name, pyspark_python, spark_dist_classpath):
env = dict(os.environ)
env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python)})
env.update({
# Setting SPARK_DIST_CLASSPATH is a simple way to make sure that any child processes
# launched by the tests have access to the correct test-time classpath.
'SPARK_DIST_CLASSPATH': spark_dist_classpath,
'SPARK_TESTING': '1',
'SPARK_PREPEND_CLASSES': '1',
'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python),
})
LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
start_time = time.time()
try:
Expand Down Expand Up @@ -175,14 +192,16 @@ def main():
priority = 100
task_queue.put((priority, (python_exec, test_goal)))

spark_dist_classpath = get_spark_dist_classpath()

def process_queue(task_queue):
while True:
try:
(priority, (python_exec, test_goal)) = task_queue.get_nowait()
except Queue.Empty:
break
try:
run_individual_python_test(test_goal, python_exec)
run_individual_python_test(test_goal, python_exec, spark_dist_classpath)
finally:
task_queue.task_done()

Expand Down