apache · potix2 · Jan 29, 2015 · Feb 23, 2015 · Jun 17, 2015 · Jun 19, 2015
diff --git a/python/run-tests b/python/run-tests
@@ -17,6 +17,65 @@
 # limitations under the License.
 #
 
+# Run test suites and indivisual test suite.
+#
+# Usage: run-tests [-v python version] [core|sql|mllib|ml|streaming]
+#
+# When you select none, all test suites are run. You can also select
+# multiple test suites.
+
+function usage() {
+    echo "Usage: run-tests [-v python version] [core|sql|mllib|ml|streaming]"
+}
+
+SUPPORT_PYTHON_VERSIONS="2.6 3.4 pypy"
+TARGET_PYTHON_VERSION=
+if [ $# != 0 -a $1 == "-v" ]; then
+    TARGET_PYTHON_VERSION=$2
+    shift 2
+fi
+
+if [ $# == 0 ]; then
+    DO_CORE_TESTS=1
+    DO_SQL_TESTS=1
+    DO_MLLIB_TESTS=1
+    DO_ML_TESTS=1
+    DO_STREAMING_TESTS=1
+else
+    DO_CORE_TESTS=0
+    DO_SQL_TESTS=0
+    DO_MLLIB_TESTS=0
+    DO_ML_TESTS=0
+    DO_STREAMING_TESTS=0
+
+    while (("$#")); do
+        case $1 in
+            core)
+                DO_CORE_TESTS=1
+                ;;
+            sql)
+                DO_SQL_TESTS=1
+                ;;
+            mllib)
+                DO_MLLIB_TESTS=1
+                ;;
+            ml)
+                DO_ML_TESTS=1
+                ;;
+            streaming)
+                DO_STREAMING_TESTS=1
+                ;;
+            *)
+                usage
+                exit 1
+                ;;
+        esac
+        shift
+    done
+fi
+
+# set the directory that this script stored in.
+SCRIPT_DIR="$(cd "`dirname "$0"`"; pwd)"
 
 # Figure out where the Spark framework is installed
 FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
@@ -26,157 +85,37 @@ FWDIR="$(cd "`dirname "$0"`"; cd ../; pwd)"
 # CD into the python directory to find things on the right path
 cd "$FWDIR/python"
 
-FAILED=0
 LOG_FILE=unit-tests.log
-START=$(date +"%s")
 
 rm -f $LOG_FILE
 
-# Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
-rm -rf metastore warehouse
-
-function run_test() {
-    echo -en "Running test: $1 ... " | tee -a $LOG_FILE
-    start=$(date +"%s")
-    SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 > $LOG_FILE 2>&1
-
-    FAILED=$((PIPESTATUS[0]||$FAILED))
-
-    # Fail and exit on the first test failure.
-    if [[ $FAILED != 0 ]]; then
-        cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
-        echo -en "\033[31m"  # Red
-        echo "Had test failures; see logs."
-        echo -en "\033[0m"  # No color
-        exit -1
-    else
-        now=$(date +"%s")
-        echo "ok ($(($now - $start))s)"
-    fi
-}
-
-function run_core_tests() {
-    echo "Run core tests ..."
-    run_test "pyspark.rdd"
-    run_test "pyspark.context"
-    run_test "pyspark.conf"
-    run_test "pyspark.broadcast"
-    run_test "pyspark.accumulators"
-    run_test "pyspark.serializers"
-    run_test "pyspark.profiler"
-    run_test "pyspark.shuffle"
-    run_test "pyspark.tests"
-}
-
-function run_sql_tests() {
-    echo "Run sql tests ..."
-    run_test "pyspark.sql.types"
-    run_test "pyspark.sql.context"
-    run_test "pyspark.sql.column"
-    run_test "pyspark.sql.dataframe"
-    run_test "pyspark.sql.group"
-    run_test "pyspark.sql.functions"
-    run_test "pyspark.sql.readwriter"
-    run_test "pyspark.sql.window"
-    run_test "pyspark.sql.tests"
-}
-
-function run_mllib_tests() {
-    echo "Run mllib tests ..."
-    run_test "pyspark.mllib.classification"
-    run_test "pyspark.mllib.clustering"
-    run_test "pyspark.mllib.evaluation"
-    run_test "pyspark.mllib.feature"
-    run_test "pyspark.mllib.fpm"
-    run_test "pyspark.mllib.linalg"
-    run_test "pyspark.mllib.random"
-    run_test "pyspark.mllib.recommendation"
-    run_test "pyspark.mllib.regression"
-    run_test "pyspark.mllib.stat._statistics"
-    run_test "pyspark.mllib.stat.KernelDensity"
-    run_test "pyspark.mllib.tree"
-    run_test "pyspark.mllib.util"
-    run_test "pyspark.mllib.tests"
-}
-
-function run_ml_tests() {
-    echo "Run ml tests ..."
-    run_test "pyspark.ml.feature"
-    run_test "pyspark.ml.classification"
-    run_test "pyspark.ml.recommendation"
-    run_test "pyspark.ml.regression"
-    run_test "pyspark.ml.tuning"
-    run_test "pyspark.ml.tests"
-    run_test "pyspark.ml.evaluation"
-}
-
-function run_streaming_tests() {
-    echo "Run streaming tests ..."
-
-    KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly
-    JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}"
-    for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do
-      if [[ ! -e "$f" ]]; then
-        echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2
-        echo "You need to build Spark with " \
-             "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \
-             "'build/mvn package' before running this program" 1>&2
-        exit 1
-      fi
-      KAFKA_ASSEMBLY_JAR="$f"
-    done
-
-    export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
-    run_test "pyspark.streaming.util"
-    run_test "pyspark.streaming.tests"
-}
-
 echo "Running PySpark tests. Output is in python/$LOG_FILE."
 
-export PYSPARK_PYTHON="python"
-
-# Try to test with Python 2.6, since that's the minimum version that we support:
-if [ $(which python2.6) ]; then
-    export PYSPARK_PYTHON="python2.6"
-fi
+if [ -n "$TARGET_PYTHON_VERSION" ]; then
+    if [[ ! "$SUPPORT_PYTHON_VERSIONS" =~ "$TARGET_PYTHON_VERSION" ]]; then
+        echo "python $TARGET_PYTHON_VERSION is not supported."
+        exit 1
+    fi
 
-echo "Testing with Python version:"
-$PYSPARK_PYTHON --version
-
-run_core_tests
-run_sql_tests
-run_mllib_tests
-run_ml_tests
-run_streaming_tests
-
-# Try to test with Python 3
-if [ $(which python3.4) ]; then
-    export PYSPARK_PYTHON="python3.4"
-    echo "Testing with Python3.4 version:"
-    $PYSPARK_PYTHON --version
-
-    run_core_tests
-    run_sql_tests
-    run_mllib_tests
-    run_ml_tests
-    run_streaming_tests
+    SUPPORT_PYTHON_VERSIONS=$TARGET_PYTHON_VERSION
 fi
 
-# Try to test with PyPy
-if [ $(which pypy) ]; then
-    export PYSPARK_PYTHON="pypy"
-    echo "Testing with PyPy version:"
-    $PYSPARK_PYTHON --version
+for PYVERSION in $SUPPORT_PYTHON_VERSIONS;
+do
+    # Remove the metastore and warehouse directory created by the HiveContext tests in Spark SQL
+    rm -rf metastore warehouse
 
-    run_core_tests
-    run_sql_tests
-    run_streaming_tests
-fi
+    START=$(date +"%s")
+    FAILED=0
 
-if [[ $FAILED == 0 ]]; then
-    now=$(date +"%s")
-    echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds"
-fi
+    if [ -f "${SCRIPT_DIR}/tests/$PYVERSION.sh" ]; then
+        . "${SCRIPT_DIR}/tests/$PYVERSION.sh"
+    else
+        . "${SCRIPT_DIR}/tests/default" $PYVERSION
+    fi
 
-# TODO: in the long-run, it would be nice to use a test runner like `nose`.
-# The doctest fixtures are the current barrier to doing this.
+    if [[ $FAILED == 0 ]]; then
+        now=$(date +"%s")
+        echo -e "\033[32mTests passed \033[0min $(($now - $START)) seconds"
+    fi
+done
diff --git a/python/tests/common b/python/tests/common
@@ -0,0 +1,132 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function run_test() {
+    echo -en "Running test: $1 ... " | tee -a $LOG_FILE
+    start=$(date +"%s")
+    SPARK_TESTING=1 time "$FWDIR"/bin/pyspark $1 > $LOG_FILE 2>&1
+
+    FAILED=$((PIPESTATUS[0]||$FAILED))
+
+    # Fail and exit on the first test failure.
+    if [[ $FAILED != 0 ]]; then
+        cat $LOG_FILE | grep -v "^[0-9][0-9]*" # filter all lines starting with a number.
+        echo -en "\033[31m"  # Red
+        echo "Had test failures; see logs."
+        echo -en "\033[0m"  # No color
+        exit -1
+    else
+        now=$(date +"%s")
+        echo "ok ($(($now - $start))s)"
+    fi
+}
+
+function run_core_tests() {
+    if [ $DO_CORE_TESTS == 0 ]; then
+        return 0
+    fi
+
+    echo "Run core tests ..."
+    run_test "pyspark.rdd"
+    run_test "pyspark.context"
+    run_test "pyspark.conf"
+    run_test "pyspark.broadcast"
+    run_test "pyspark.accumulators"
+    run_test "pyspark.serializers"
+    run_test "pyspark.profiler"
+    run_test "pyspark.shuffle"
+    run_test "pyspark.tests"
+}
+
+function run_sql_tests() {
+    if [ $DO_SQL_TESTS == 0 ]; then
+        return 0
+    fi
+
+    echo "Run sql tests ..."
+    run_test "pyspark.sql.types"
+    run_test "pyspark.sql.context"
+    run_test "pyspark.sql.column"
+    run_test "pyspark.sql.dataframe"
+    run_test "pyspark.sql.group"
+    run_test "pyspark.sql.functions"
+    run_test "pyspark.sql.readwriter"
+    run_test "pyspark.sql.window"
+    run_test "pyspark.sql.tests"
+}
+
+function run_mllib_tests() {
+    if [ $DO_MLLIB_TESTS == 0 ]; then
+        return 0
+    fi
+
+    echo "Run mllib tests ..."
+    run_test "pyspark.mllib.classification"
+    run_test "pyspark.mllib.clustering"
+    run_test "pyspark.mllib.evaluation"
+    run_test "pyspark.mllib.feature"
+    run_test "pyspark.mllib.fpm"
+    run_test "pyspark.mllib.linalg"
+    run_test "pyspark.mllib.random"
+    run_test "pyspark.mllib.recommendation"
+    run_test "pyspark.mllib.regression"
+    run_test "pyspark.mllib.stat._statistics"
+    run_test "pyspark.mllib.stat.KernelDensity"
+    run_test "pyspark.mllib.tree"
+    run_test "pyspark.mllib.util"
+    run_test "pyspark.mllib.tests"
+}
+
+function run_ml_tests() {
+    if [ $DO_ML_TESTS == 0 ]; then
+        return 0
+    fi
+
+    echo "Run ml tests ..."
+    run_test "pyspark.ml.feature"
+    run_test "pyspark.ml.classification"
+    run_test "pyspark.ml.recommendation"
+    run_test "pyspark.ml.regression"
+    run_test "pyspark.ml.tuning"
+    run_test "pyspark.ml.tests"
+    run_test "pyspark.ml.evaluation"
+}
+
+function run_streaming_tests() {
+    if [ $DO_STREAMING_TESTS == 0 ]; then
+        return 0
+    fi
+
+    echo "Run streaming tests ..."
+
+    KAFKA_ASSEMBLY_DIR="$FWDIR"/external/kafka-assembly
+    JAR_PATH="${KAFKA_ASSEMBLY_DIR}/target/scala-${SPARK_SCALA_VERSION}"
+    for f in "${JAR_PATH}"/spark-streaming-kafka-assembly-*.jar; do
+      if [[ ! -e "$f" ]]; then
+        echo "Failed to find Spark Streaming Kafka assembly jar in $KAFKA_ASSEMBLY_DIR" 1>&2
+        echo "You need to build Spark with " \
+             "'build/sbt assembly/assembly streaming-kafka-assembly/assembly' or" \
+             "'build/mvn package' before running this program" 1>&2
+        exit 1
+      fi
+      KAFKA_ASSEMBLY_JAR="$f"
+    done
+
+    export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
+    run_test "pyspark.streaming.util"
+    run_test "pyspark.streaming.tests"
+}