Address comments and see if it works

apache · HyukjinKwon · Apr 19, 2018 · Apr 19, 2018 · Apr 22, 2018 · Apr 22, 2018
commit 56b9001362ce18908689d679a8fdb4e554460cd0
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
@@ -2136,17 +2136,23 @@ class ImageReaderTest2(PySparkTestCase):
     @classmethod
     def setUpClass(cls):
         super(ImageReaderTest2, cls).setUpClass()
+        cls.hive_available = True
         # Note that here we enable Hive's support.
         cls.spark = None
         try:
             cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
         except py4j.protocol.Py4JError:
             cls.tearDownClass()
-            raise unittest.SkipTest("Hive is not available")
+            cls.hive_available = False
         except TypeError:
             cls.tearDownClass()
-            raise unittest.SkipTest("Hive is not available")
-        cls.spark = HiveContext._createForTesting(cls.sc)
+            cls.hive_available = False
+        if cls.hive_available:
+            cls.spark = HiveContext._createForTesting(cls.sc)
+
+    def setUp(self):
+        if not self.hive_available:
+            self.skipTest("Hive is not available.")
 
     @classmethod
     def tearDownClass(cls):

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -3079,6 +3079,12 @@ class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils):
     # These tests are separate because it uses 'spark.sql.queryExecutionListeners' which is
     # static and immutable. This can't be set or unset, for example, via `spark.conf`.
 
+    def setUp(self):
+        if not self.has_listener:
+            raise self.skipTest(
+                "'org.apache.spark.sql.TestQueryExecutionListener' is not "
+                "available. Will skip the related tests.")
+
     @classmethod
     def setUpClass(cls):
         import glob
@@ -3088,23 +3094,22 @@ def setUpClass(cls):
         filename_pattern = (
             "sql/core/target/scala-*/test-classes/org/apache/spark/sql/"
             "TestQueryExecutionListener.class")
-        if not glob.glob(os.path.join(SPARK_HOME, filename_pattern)):
-            raise unittest.SkipTest(
-                "'org.apache.spark.sql.TestQueryExecutionListener' is not "
-                "available. Will skip the related tests.")
-
-        # Note that 'spark.sql.queryExecutionListeners' is a static immutable configuration.
-        cls.spark = SparkSession.builder \
-            .master("local[4]") \
-            .appName(cls.__name__) \
-            .config(
-                "spark.sql.queryExecutionListeners",
-                "org.apache.spark.sql.TestQueryExecutionListener") \
-            .getOrCreate()
+        cls.has_listener = bool(glob.glob(os.path.join(SPARK_HOME, filename_pattern)))
+
+        if cls.has_listener:
+            # Note that 'spark.sql.queryExecutionListeners' is a static immutable configuration.
+            cls.spark = SparkSession.builder \
+                .master("local[4]") \
+                .appName(cls.__name__) \
+                .config(
+                    "spark.sql.queryExecutionListeners",
+                    "org.apache.spark.sql.TestQueryExecutionListener") \
+                .getOrCreate()
 
     @classmethod
     def tearDownClass(cls):
-        cls.spark.stop()
+        if hasattr(cls, "spark"):
+            cls.spark.stop()
 
     def tearDown(self):
         self.spark._jvm.OnSuccessCall.clear()
@@ -3188,18 +3193,22 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
     def setUpClass(cls):
         ReusedPySparkTestCase.setUpClass()
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        cls.hive_available = True
         try:
             cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
         except py4j.protocol.Py4JError:
-            cls.tearDownClass()
-            raise unittest.SkipTest("Hive is not available")
+            cls.hive_available = False
         except TypeError:
-            cls.tearDownClass()
-            raise unittest.SkipTest("Hive is not available")
+            cls.hive_available = False
         os.unlink(cls.tempdir.name)
-        cls.spark = HiveContext._createForTesting(cls.sc)
-        cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
-        cls.df = cls.sc.parallelize(cls.testData).toDF()
+        if cls.hive_available:
+            cls.spark = HiveContext._createForTesting(cls.sc)
+            cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
+            cls.df = cls.sc.parallelize(cls.testData).toDF()
+
+    def setUp(self):
+        if not self.hive_available:
+            self.skipTest("Hive is not available.")
 
     @classmethod
     def tearDownClass(cls):

diff --git a/python/run-tests.py b/python/run-tests.py
@@ -111,6 +111,7 @@ def run_individual_python_test(test_name, pyspark_python):
             # this code is invoked from a thread other than the main thread.
             os._exit(-1)
     else:
+        skipped_counts = 0
         try:
             per_test_output.seek(0)
             # Here expects skipped test output from unittest when verbosity level is
@@ -119,7 +120,8 @@ def run_individual_python_test(test_name, pyspark_python):
             skipped_tests = list(filter(
                 lambda line: re.search('test_.* \(pyspark\..*\) ... skipped ', line),
                 decoded_lines))
-            if len(skipped_tests) > 0:
+            skipped_counts = len(skipped_tests)
+            if skipped_counts > 0:
                 key = (pyspark_python, test_name)
                 SKIPPED_TESTS[key] = skipped_tests
             per_test_output.close()
@@ -130,7 +132,13 @@ def run_individual_python_test(test_name, pyspark_python):
             # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
             # this code is invoked from a thread other than the main thread.
             os._exit(-1)
-        LOGGER.info("Finished test(%s): %s (%is)", pyspark_python, test_name, duration)
+        if skipped_counts != 0:
+            LOGGER.info(
+                "Finished test(%s): %s (%is) ... %s tests were skipped", pyspark_python, test_name,
+                duration, skipped_counts)
+        else:
+            LOGGER.info(
+                "Finished test(%s): %s (%is)", pyspark_python, test_name, duration)
 
 
 def get_default_python_executables():