Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1bdeb87
Move module definitions to separate file.
JoshRosen Jun 22, 2015
311c6a9
Move shell utility functions to own module.
JoshRosen Jun 22, 2015
32660fc
Initial cut at Python test runner refactoring
JoshRosen Jun 23, 2015
dcc9c09
Fix time division
JoshRosen Jun 23, 2015
4c97136
PYTHONPATH fixes
JoshRosen Jun 23, 2015
04015b9
First attempt at getting PySpark Kafka test to work in new runner script
JoshRosen Jun 24, 2015
aec0b8f
Actually get the Kafka stuff to run properly
JoshRosen Jun 24, 2015
def2d8a
Two minor fixes
JoshRosen Jun 24, 2015
d6a77d3
Fix the tests of dev/run-tests
JoshRosen Jun 24, 2015
caeb040
Fixes to PySpark test module definitions
JoshRosen Jun 24, 2015
b2ab027
Add command-line options for running individual suites in python/run-…
JoshRosen Jun 24, 2015
2efd594
Update dev/run-tests to use new Python test runner flags
JoshRosen Jun 24, 2015
fff4d09
Add dev/sparktestsupport to pep8 checks
JoshRosen Jun 24, 2015
f542ac5
Fix lint check for Python 3
JoshRosen Jun 24, 2015
8f3244c
Use universal_newlines to fix dev/run-tests doctest failures on Pytho…
JoshRosen Jun 25, 2015
4f8902c
Python lint fixes.
JoshRosen Jun 25, 2015
d33e525
Merge remote-tracking branch 'origin/master' into run-tests-python-mo…
JoshRosen Jun 25, 2015
9c80469
Fix passing of PYSPARK_PYTHON
JoshRosen Jun 25, 2015
f53db55
Remove python2 flag, since the test runner script also works fine und…
JoshRosen Jun 26, 2015
3b852ae
Fall back to PYSPARK_PYTHON when sys.executable is None (fixes a test)
JoshRosen Jun 26, 2015
568a3fd
Fix hashbang
JoshRosen Jun 26, 2015
c364ccf
Use which() to convert PYSPARK_PYTHON to an absolute path before shel…
JoshRosen Jun 26, 2015
27a389f
Skip MLLib tests for PyPy
JoshRosen Jun 26, 2015
37aff00
Python 3 fix
JoshRosen Jun 26, 2015
8f65ed0
Fix handling of module in python/run-tests
JoshRosen Jun 26, 2015
34c98d2
Fix universal_newlines for Python 3
JoshRosen Jun 27, 2015
8233d61
Add python/run-tests.py to Python lint checks
JoshRosen Jun 27, 2015
f578d6d
Fix print for Python 2.x
JoshRosen Jun 27, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
First attempt at getting PySpark Kafka test to work in new runner script
  • Loading branch information
JoshRosen committed Jun 24, 2015
commit 04015b9dd5d2a2f47c1ca48408bca8949709ab01
31 changes: 31 additions & 0 deletions python/pyspark/streaming/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.
#

import glob
import os
import sys
from itertools import chain
Expand Down Expand Up @@ -575,6 +576,36 @@ def check_output(n):
class KafkaStreamTests(PySparkStreamingTestCase):
timeout = 20 # seconds
duration = 1
old_pyspark_submit_args = None

@classmethod
def setUpClass(cls):
cls.old_pyspark_submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
SPARK_HOME = os.environ["SPARK_HOME"]
kafka_assembly_dir = os.path.join(SPARK_HOME, "external/kafka-assembly")
jars = glob.glob(
os.path.join(kafka_assembly_dir, "target/scala-*/spark-streaming-kafka-assembly-*.jar"))
if not jars:
raise Exception(
("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) +
"You need to build Spark with "
"'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or "
"'build/mvn package' before running this test")
elif len(jars) > 1:
raise Exception(("Found multiple Spark Streaming Kafka assembly JARs in %s; please "
"remove all but one") % kafka_assembly_dir)
else:
os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars %s pyspark-shell" % jars[0]
print(os.environ["PYSPARK_SUBMIT_ARGS"])
super(KafkaStreamTests, cls).setUpClass()

@classmethod
def tearDownClass(cls):
if cls.old_pyspark_submit_args is None:
del os.environ["PYSPARK_SUBMIT_ARGS"]
else:
os.environ["PYSPARK_SUBMIT_ARGS"] = cls.old_pyspark_submit_args
super(KafkaStreamTests, cls).tearDownClass()

def setUp(self):
super(KafkaStreamTests, self).setUp()
Expand Down
79 changes: 0 additions & 79 deletions python/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -24,82 +24,3 @@ cd "$FWDIR"
exec python -u ./python/run-tests.py

exit


FAILED=0
LOG_FILE=unit-tests.log
START=$(date +"%s")

rm -f $LOG_FILE


function run_streaming_tests() {
echo "Run streaming tests ..."

KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly
JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}"
for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do
if [[ ! -e "$f" ]]; then
echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2
echo "You need to build Spark with " \
"'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \
"'build/mvn package' before running this program" 1>&2
exit 1
fi
KAFKA_ASSEMBLY_JAR="$f"
done

export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
run_test "pyspark.streaming.util"
run_test "pyspark.streaming.tests"
}

echo "Running PySpark tests. Output is in python/$LOG_FILE."

export PYSPARK_PYTHON="python"

# Try to test with Python 2.6, since that's the minimum version that we support:
if [ $(which python2.6) ]; then
export PYSPARK_PYTHON="python2.6"
fi

echo "Testing with Python version:"
$PYSPARK_PYTHON --version

run_core_tests
run_sql_tests
run_mllib_tests
run_ml_tests
run_streaming_tests

# Try to test with Python 3
if [ $(which python3.4) ]; then
export PYSPARK_PYTHON="python3.4"
echo "Testing with Python3.4 version:"
$PYSPARK_PYTHON --version

run_core_tests
run_sql_tests
run_mllib_tests
run_ml_tests
run_streaming_tests
fi

# Try to test with PyPy
if [ $(which pypy) ]; then
export PYSPARK_PYTHON="pypy"
echo "Testing with PyPy version:"
$PYSPARK_PYTHON --version

run_core_tests
run_sql_tests
run_streaming_tests
fi

if [[ $FAILED == 0 ]]; then
now=$(date +"%s")
echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds"
fi

# TODO: in the long-run, it would be nice to use a test runner like `nose`.
# The doctest fixtures are the current barrier to doing this.
1 change: 1 addition & 0 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def run_individual_python_test(test_name, pyspark_python=None):

def main():
# TODO: do we need to remove the metastore and warehouse created by the SQL tests? Ask Ahir.
print("Running PySpark tests. Output is in python/%s" % LOG_FILE)
if os.path.exists(LOG_FILE):
os.remove(LOG_FILE)
python_execs = [x for x in ["python2.6", "python3.4", "pypy"] if which(x)]
Expand Down