create new exception with modified msg

apache · BryanCutler · Mar 15, 2018 · Mar 16, 2018 · Mar 20, 2018 · Mar 21, 2018
commit dc5ed47ec36433850d6c58890d19b4173eda9a06
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -2013,13 +2013,14 @@ def toPandas(self):
                     warnings.warn(msg)
                     use_arrow = False
                 else:
-                    e.message = (
+                    clazz = type(e)
+                    msg = (
                         "toPandas attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.enabled' is set to true; however, "
                         "failed by the reason below:\n  %s\n"
                         "For fallback to non-optimization automatically, please set true to "
                         "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e))
-                    raise
+                    raise clazz(msg)
 
             # Try to use Arrow optimization when the schema is supported and the required version
             # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled.
@@ -2040,14 +2041,15 @@ def toPandas(self):
                 except Exception as e:
                     # We might have to allow fallback here as well but multiple Spark jobs can
                     # be executed. So, simply fail in this case for now.
-                    e.message = (
+                    clazz = type(e)
+                    msg = (
                         "toPandas attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.enabled' is set to true; however, "
                         "failed unexpectedly:\n  %s\n"
                         "Note that 'spark.sql.execution.arrow.fallback.enabled' does "
                         "not have an effect in such failure in the middle of "
                         "computation." % _exception_message(e))
-                    raise
+                    raise clazz(msg)
 
         # Below is toPandas without Arrow optimization.
         pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)

diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
@@ -679,13 +679,14 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
                             "true." % _exception_message(e))
                         warnings.warn(msg)
                     else:
-                        e.message = (
+                        clazz = type(e)
+                        msg = (
                             "createDataFrame attempted Arrow optimization because "
                             "'spark.sql.execution.arrow.enabled' is set to true; however, "
                             "failed by the reason below:\n  %s\n"
                             "For fallback to non-optimization automatically, please set true to "
                             "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e))
-                        raise
+                        raise clazz(msg)
             data = self._convert_from_pandas(data, schema, timezone)
 
         if isinstance(schema, StructType):