Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address comments and see if it works
  • Loading branch information
HyukjinKwon committed Apr 22, 2018
commit 56b9001362ce18908689d679a8fdb4e554460cd0
12 changes: 9 additions & 3 deletions python/pyspark/ml/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2136,17 +2136,23 @@ class ImageReaderTest2(PySparkTestCase):
@classmethod
def setUpClass(cls):
super(ImageReaderTest2, cls).setUpClass()
cls.hive_available = True
# Note that here we enable Hive's support.
cls.spark = None
try:
cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
except py4j.protocol.Py4JError:
cls.tearDownClass()
raise unittest.SkipTest("Hive is not available")
cls.hive_available = False
except TypeError:
cls.tearDownClass()
raise unittest.SkipTest("Hive is not available")
cls.spark = HiveContext._createForTesting(cls.sc)
cls.hive_available = False
if cls.hive_available:
cls.spark = HiveContext._createForTesting(cls.sc)

def setUp(self):
if not self.hive_available:
self.skipTest("Hive is not available.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Finished test(python3): pyspark.sql.tests (51s) ... 93 tests were skipped

...

Skipped tests in pyspark.sql.tests with python3:
    test_createDataFrame_column_name_encoding (pyspark.sql.tests.ArrowTests) ... skipped 'PyArrow >= 0.8.0 must be installed; however, it was not found.'
    ...
    test_collect_functions (pyspark.sql.tests.HiveContextSQLTests) ... skipped 'Hive is not available.'
    test_datetime_functions (pyspark.sql.tests.HiveContextSQLTests) ... skipped 'Hive is not available.'
    ...
    test_query_execution_listener_on_collect (pyspark.sql.tests.QueryExecutionListenerTests) ... skipped "'org.apache.spark.sql.TestQueryExecutionListener' is not available. Will skip the related tests."
    ...

@viirya, @bersprockets and @BryanCutler, these were the output from my partial testing in my local.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little worry that it can be too verbose when skipped tests are too many. See #21107 (comment).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume there is no way to only print out the skipped test class name?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No clean way as far as I can tell. I should do another regex thing but .. I would like to avoid this way as possible as I can ..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. Yeah the current implementation looks good to me.


@classmethod
def tearDownClass(cls):
Expand Down
51 changes: 30 additions & 21 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3079,6 +3079,12 @@ class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils):
# These tests are separate because it uses 'spark.sql.queryExecutionListeners' which is
# static and immutable. This can't be set or unset, for example, via `spark.conf`.

def setUp(self):
if not self.has_listener:
raise self.skipTest(
"'org.apache.spark.sql.TestQueryExecutionListener' is not "
"available. Will skip the related tests.")

@classmethod
def setUpClass(cls):
import glob
Expand All @@ -3088,23 +3094,22 @@ def setUpClass(cls):
filename_pattern = (
"sql/core/target/scala-*/test-classes/org/apache/spark/sql/"
"TestQueryExecutionListener.class")
if not glob.glob(os.path.join(SPARK_HOME, filename_pattern)):
raise unittest.SkipTest(
"'org.apache.spark.sql.TestQueryExecutionListener' is not "
"available. Will skip the related tests.")

# Note that 'spark.sql.queryExecutionListeners' is a static immutable configuration.
cls.spark = SparkSession.builder \
.master("local[4]") \
.appName(cls.__name__) \
.config(
"spark.sql.queryExecutionListeners",
"org.apache.spark.sql.TestQueryExecutionListener") \
.getOrCreate()
cls.has_listener = bool(glob.glob(os.path.join(SPARK_HOME, filename_pattern)))

if cls.has_listener:
# Note that 'spark.sql.queryExecutionListeners' is a static immutable configuration.
cls.spark = SparkSession.builder \
.master("local[4]") \
.appName(cls.__name__) \
.config(
"spark.sql.queryExecutionListeners",
"org.apache.spark.sql.TestQueryExecutionListener") \
.getOrCreate()

@classmethod
def tearDownClass(cls):
cls.spark.stop()
if hasattr(cls, "spark"):
cls.spark.stop()

def tearDown(self):
self.spark._jvm.OnSuccessCall.clear()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not called if the test is skipped during setUp right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup.

Expand Down Expand Up @@ -3188,18 +3193,22 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
def setUpClass(cls):
ReusedPySparkTestCase.setUpClass()
cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
cls.hive_available = True
try:
cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
except py4j.protocol.Py4JError:
cls.tearDownClass()
raise unittest.SkipTest("Hive is not available")
cls.hive_available = False
except TypeError:
cls.tearDownClass()
raise unittest.SkipTest("Hive is not available")
cls.hive_available = False
os.unlink(cls.tempdir.name)
cls.spark = HiveContext._createForTesting(cls.sc)
cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
cls.df = cls.sc.parallelize(cls.testData).toDF()
if cls.hive_available:
cls.spark = HiveContext._createForTesting(cls.sc)
cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
cls.df = cls.sc.parallelize(cls.testData).toDF()

def setUp(self):
if not self.hive_available:
self.skipTest("Hive is not available.")

@classmethod
def tearDownClass(cls):
Expand Down
12 changes: 10 additions & 2 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def run_individual_python_test(test_name, pyspark_python):
# this code is invoked from a thread other than the main thread.
os._exit(-1)
else:
skipped_counts = 0
try:
per_test_output.seek(0)
# Here expects skipped test output from unittest when verbosity level is
Expand All @@ -119,7 +120,8 @@ def run_individual_python_test(test_name, pyspark_python):
skipped_tests = list(filter(
lambda line: re.search('test_.* \(pyspark\..*\) ... skipped ', line),
decoded_lines))
if len(skipped_tests) > 0:
skipped_counts = len(skipped_tests)
if skipped_counts > 0:
key = (pyspark_python, test_name)
SKIPPED_TESTS[key] = skipped_tests
per_test_output.close()
Expand All @@ -130,7 +132,13 @@ def run_individual_python_test(test_name, pyspark_python):
# Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
# this code is invoked from a thread other than the main thread.
os._exit(-1)
LOGGER.info("Finished test(%s): %s (%is)", pyspark_python, test_name, duration)
if skipped_counts != 0:
LOGGER.info(
"Finished test(%s): %s (%is) ... %s tests were skipped", pyspark_python, test_name,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if there's a better format. let me know.

duration, skipped_counts)
else:
LOGGER.info(
"Finished test(%s): %s (%is)", pyspark_python, test_name, duration)


def get_default_python_executables():
Expand Down