Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2013,13 +2013,13 @@ def toPandas(self):
warnings.warn(msg)
use_arrow = False
else:
msg = (
e.message = (
"toPandas attempted Arrow optimization because "
"'spark.sql.execution.arrow.enabled' is set to true; however, "
"failed by the reason below:\n %s\n"
"For fallback to non-optimization automatically, please set true to "
"'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e))
raise RuntimeError(msg)
raise

# Try to use Arrow optimization when the schema is supported and the required version
# of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled.
Expand All @@ -2040,14 +2040,14 @@ def toPandas(self):
except Exception as e:
# We might have to allow fallback here as well but multiple Spark jobs can
# be executed. So, simply fail in this case for now.
msg = (
e.message = (
"toPandas attempted Arrow optimization because "
"'spark.sql.execution.arrow.enabled' is set to true; however, "
"failed unexpectedly:\n %s\n"
"Note that 'spark.sql.execution.arrow.fallback.enabled' does "
"not have an effect in such failure in the middle of "
"computation." % _exception_message(e))
raise RuntimeError(msg)
raise

# Below is toPandas without Arrow optimization.
pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,13 +679,13 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
"true." % _exception_message(e))
warnings.warn(msg)
else:
msg = (
e.message = (
Copy link
Member

@HyukjinKwon HyukjinKwon Mar 16, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BryanCutler, I think message attribute is only in Python 2. Also, are you doubly sure that this wraps the exception message in console too .. ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, you're right. The tests check the exception message so I thought all was good. Let me try something else.

"createDataFrame attempted Arrow optimization because "
"'spark.sql.execution.arrow.enabled' is set to true; however, "
"failed by the reason below:\n %s\n"
"For fallback to non-optimization automatically, please set true to "
"'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e))
raise RuntimeError(msg)
raise
data = self._convert_from_pandas(data, schema, timezone)

if isinstance(schema, StructType):
Expand Down
6 changes: 3 additions & 3 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3661,7 +3661,7 @@ def test_createDataFrame_with_incorrect_schema(self):
pdf = self.create_pandas_data_frame()
wrong_schema = StructType(list(reversed(self.schema)))
with QuietTest(self.sc):
with self.assertRaisesRegexp(RuntimeError, ".*No cast.*string.*timestamp.*"):
with self.assertRaisesRegexp(Exception, ".*No cast.*string.*timestamp.*"):
self.spark.createDataFrame(pdf, schema=wrong_schema)

def test_createDataFrame_with_names(self):
Expand All @@ -3686,7 +3686,7 @@ def test_createDataFrame_column_name_encoding(self):
def test_createDataFrame_with_single_data_type(self):
import pandas as pd
with QuietTest(self.sc):
with self.assertRaisesRegexp(RuntimeError, ".*IntegerType.*not supported.*"):
with self.assertRaisesRegexp(ValueError, ".*IntegerType.*not supported.*"):
self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int")

def test_createDataFrame_does_not_modify_input(self):
Expand Down Expand Up @@ -3761,7 +3761,7 @@ def test_createDataFrame_fallback_disabled(self):
import pandas as pd

with QuietTest(self.sc):
with self.assertRaisesRegexp(Exception, 'Unsupported type'):
with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
self.spark.createDataFrame(
pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")

Expand Down