diff --git a/python/run-tests b/python/run-tests index 4468fdb3f267..da6d37a1cd8e 100755 --- a/python/run-tests +++ b/python/run-tests @@ -17,6 +17,65 @@ # limitations under the License. # +# Run test suites and indivisual test suite. +# +# Usage: run-tests [-v python version] [core|sql|mllib|ml|streaming] +# +# When you select none, all test suites are run. You can also select +# multiple test suites. + +function usage() { + echo "Usage: run-tests [-v python version] [core|sql|mllib|ml|streaming]" +} + +SUPPORT_PYTHON_VERSIONS="2.6 3.4 pypy" +TARGET_PYTHON_VERSION= +if [ $# != 0 -a $1 == "-v" ]; then + TARGET_PYTHON_VERSION=$2 + shift 2 +fi + +if [ $# == 0 ]; then + DO_CORE_TESTS=1 + DO_SQL_TESTS=1 + DO_MLLIB_TESTS=1 + DO_ML_TESTS=1 + DO_STREAMING_TESTS=1 +else + DO_CORE_TESTS=0 + DO_SQL_TESTS=0 + DO_MLLIB_TESTS=0 + DO_ML_TESTS=0 + DO_STREAMING_TESTS=0 + + while (("$#")); do + case $1 in + core) + DO_CORE_TESTS=1 + ;; + sql) + DO_SQL_TESTS=1 + ;; + mllib) + DO_MLLIB_TESTS=1 + ;; + ml) + DO_ML_TESTS=1 + ;; + streaming) + DO_STREAMING_TESTS=1 + ;; + *) + usage + exit 1 + ;; + esac + shift + done +fi + +# set the directory that this script stored in. +SCRIPT_DIR="$(cd "`dirname "$0"`"; pwd)" # Figure out where the Spark framework is installed FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)" @@ -26,157 +85,37 @@ FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)" # CD into the python directory to find things on the right path cd "$FWDIR/python" -FAILED=0 LOG_FILE=unit-tests.log -START=$(date +"%s") rm -f $LOG_FILE -# Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL -rm -rf metastore warehouse - -function run_test() { - echo -en "Running test: $1 ... " | tee -a $LOG_FILE - start=$(date +"%s") - SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 > $LOG_FILE 2>&1 - - FAILED=$((PIPESTATUS[0]||$FAILED)) - - # Fail and exit on the first test failure. - if [[ $FAILED != 0 ]]; then - cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number. - echo -en "\033[31m" # Red - echo "Had test failures; see logs." - echo -en "\033[0m" # No color - exit -1 - else - now=$(date +"%s") - echo "ok ($(($now - $start))s)" - fi -} - -function run_core_tests() { - echo "Run core tests ..." - run_test "pyspark.rdd" - run_test "pyspark.context" - run_test "pyspark.conf" - run_test "pyspark.broadcast" - run_test "pyspark.accumulators" - run_test "pyspark.serializers" - run_test "pyspark.profiler" - run_test "pyspark.shuffle" - run_test "pyspark.tests" -} - -function run_sql_tests() { - echo "Run sql tests ..." - run_test "pyspark.sql.types" - run_test "pyspark.sql.context" - run_test "pyspark.sql.column" - run_test "pyspark.sql.dataframe" - run_test "pyspark.sql.group" - run_test "pyspark.sql.functions" - run_test "pyspark.sql.readwriter" - run_test "pyspark.sql.window" - run_test "pyspark.sql.tests" -} - -function run_mllib_tests() { - echo "Run mllib tests ..." - run_test "pyspark.mllib.classification" - run_test "pyspark.mllib.clustering" - run_test "pyspark.mllib.evaluation" - run_test "pyspark.mllib.feature" - run_test "pyspark.mllib.fpm" - run_test "pyspark.mllib.linalg" - run_test "pyspark.mllib.random" - run_test "pyspark.mllib.recommendation" - run_test "pyspark.mllib.regression" - run_test "pyspark.mllib.stat._statistics" - run_test "pyspark.mllib.stat.KernelDensity" - run_test "pyspark.mllib.tree" - run_test "pyspark.mllib.util" - run_test "pyspark.mllib.tests" -} - -function run_ml_tests() { - echo "Run ml tests ..." - run_test "pyspark.ml.feature" - run_test "pyspark.ml.classification" - run_test "pyspark.ml.recommendation" - run_test "pyspark.ml.regression" - run_test "pyspark.ml.tuning" - run_test "pyspark.ml.tests" - run_test "pyspark.ml.evaluation" -} - -function run_streaming_tests() { - echo "Run streaming tests ..." - - KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly - JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}" - for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do - if [[ ! -e "$f" ]]; then - echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2 - echo "You need to build Spark with " \ - "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \ - "'build/mvn package' before running this program" 1>&2 - exit 1 - fi - KAFKA_ASSEMBLY_JAR="$f" - done - - export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell" - run_test "pyspark.streaming.util" - run_test "pyspark.streaming.tests" -} - echo "Running PySpark tests. Output is in python/$LOG_FILE." -export PYSPARK_PYTHON="python" - -# Try to test with Python 2.6, since that's the minimum version that we support: -if [ $(which python2.6) ]; then - export PYSPARK_PYTHON="python2.6" -fi +if [ -n "$TARGET_PYTHON_VERSION" ]; then + if [[ ! "$SUPPORT_PYTHON_VERSIONS" =~ "$TARGET_PYTHON_VERSION" ]]; then + echo "python $TARGET_PYTHON_VERSION is not supported." + exit 1 + fi -echo "Testing with Python version:" -$PYSPARK_PYTHON --version - -run_core_tests -run_sql_tests -run_mllib_tests -run_ml_tests -run_streaming_tests - -# Try to test with Python 3 -if [ $(which python3.4) ]; then - export PYSPARK_PYTHON="python3.4" - echo "Testing with Python3.4 version:" - $PYSPARK_PYTHON --version - - run_core_tests - run_sql_tests - run_mllib_tests - run_ml_tests - run_streaming_tests + SUPPORT_PYTHON_VERSIONS=$TARGET_PYTHON_VERSION fi -# Try to test with PyPy -if [ $(which pypy) ]; then - export PYSPARK_PYTHON="pypy" - echo "Testing with PyPy version:" - $PYSPARK_PYTHON --version +for PYVERSION in $SUPPORT_PYTHON_VERSIONS; +do + # Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL + rm -rf metastore warehouse - run_core_tests - run_sql_tests - run_streaming_tests -fi + START=$(date +"%s") + FAILED=0 -if [[ $FAILED == 0 ]]; then - now=$(date +"%s") - echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds" -fi + if [ -f "${SCRIPT_DIR}/tests/$PYVERSION.sh" ]; then + . "${SCRIPT_DIR}/tests/$PYVERSION.sh" + else + . "${SCRIPT_DIR}/tests/default" $PYVERSION + fi -# TODO: in the long-run, it would be nice to use a test runner like `nose`. -# The doctest fixtures are the current barrier to doing this. + if [[ $FAILED == 0 ]]; then + now=$(date +"%s") + echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds" + fi +done diff --git a/python/tests/common b/python/tests/common new file mode 100644 index 000000000000..6f5000d59eda --- /dev/null +++ b/python/tests/common @@ -0,0 +1,132 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function run_test() { + echo -en "Running test: $1 ... " | tee -a $LOG_FILE + start=$(date +"%s") + SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 > $LOG_FILE 2>&1 + + FAILED=$((PIPESTATUS[0]||$FAILED)) + + # Fail and exit on the first test failure. + if [[ $FAILED != 0 ]]; then + cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number. + echo -en "\033[31m" # Red + echo "Had test failures; see logs." + echo -en "\033[0m" # No color + exit -1 + else + now=$(date +"%s") + echo "ok ($(($now - $start))s)" + fi +} + +function run_core_tests() { + if [ $DO_CORE_TESTS == 0 ]; then + return 0 + fi + + echo "Run core tests ..." + run_test "pyspark.rdd" + run_test "pyspark.context" + run_test "pyspark.conf" + run_test "pyspark.broadcast" + run_test "pyspark.accumulators" + run_test "pyspark.serializers" + run_test "pyspark.profiler" + run_test "pyspark.shuffle" + run_test "pyspark.tests" +} + +function run_sql_tests() { + if [ $DO_SQL_TESTS == 0 ]; then + return 0 + fi + + echo "Run sql tests ..." + run_test "pyspark.sql.types" + run_test "pyspark.sql.context" + run_test "pyspark.sql.column" + run_test "pyspark.sql.dataframe" + run_test "pyspark.sql.group" + run_test "pyspark.sql.functions" + run_test "pyspark.sql.readwriter" + run_test "pyspark.sql.window" + run_test "pyspark.sql.tests" +} + +function run_mllib_tests() { + if [ $DO_MLLIB_TESTS == 0 ]; then + return 0 + fi + + echo "Run mllib tests ..." + run_test "pyspark.mllib.classification" + run_test "pyspark.mllib.clustering" + run_test "pyspark.mllib.evaluation" + run_test "pyspark.mllib.feature" + run_test "pyspark.mllib.fpm" + run_test "pyspark.mllib.linalg" + run_test "pyspark.mllib.random" + run_test "pyspark.mllib.recommendation" + run_test "pyspark.mllib.regression" + run_test "pyspark.mllib.stat._statistics" + run_test "pyspark.mllib.stat.KernelDensity" + run_test "pyspark.mllib.tree" + run_test "pyspark.mllib.util" + run_test "pyspark.mllib.tests" +} + +function run_ml_tests() { + if [ $DO_ML_TESTS == 0 ]; then + return 0 + fi + + echo "Run ml tests ..." + run_test "pyspark.ml.feature" + run_test "pyspark.ml.classification" + run_test "pyspark.ml.recommendation" + run_test "pyspark.ml.regression" + run_test "pyspark.ml.tuning" + run_test "pyspark.ml.tests" + run_test "pyspark.ml.evaluation" +} + +function run_streaming_tests() { + if [ $DO_STREAMING_TESTS == 0 ]; then + return 0 + fi + + echo "Run streaming tests ..." + + KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly + JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}" + for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do + if [[ ! -e "$f" ]]; then + echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2 + echo "You need to build Spark with " \ + "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \ + "'build/mvn package' before running this program" 1>&2 + exit 1 + fi + KAFKA_ASSEMBLY_JAR="$f" + done + + export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell" + run_test "pyspark.streaming.util" + run_test "pyspark.streaming.tests" +} diff --git a/python/tests/default b/python/tests/default new file mode 100644 index 000000000000..79eb0748405a --- /dev/null +++ b/python/tests/default @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +. tests/common + +PYVERSION=$1 +PYBIN_NAME="python${PYVERSION}" + +# Try to test with particular version of Python, since that's the minimum version that we support: +if [ $(which $PYBIN_NAME) ]; then + export PYSPARK_PYTHON=$PYBIN_NAME + echo "Testing with Python version:" + $PYSPARK_PYTHON --version + + run_core_tests + run_sql_tests + run_mllib_tests + run_ml_tests + run_streaming_tests +else + echo "Skipping tests with Python version: $PYVERSION" + FAILED=1 +fi diff --git a/python/tests/pypy.sh b/python/tests/pypy.sh new file mode 100644 index 000000000000..35ee1857cc0f --- /dev/null +++ b/python/tests/pypy.sh @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +. tests/common + +# Try to test with PyPy +if [ $(which pypy) ]; then + export PYSPARK_PYTHON="pypy" + echo "Testing with PyPy version:" + $PYSPARK_PYTHON --version + + run_core_tests + run_sql_tests + run_streaming_tests +else + echo "Skipping tests with PyPy" + FAILED=1 +fi