defaults to conf

apache · xinrong-meng · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024 · Oct 31, 2024
commit 3f49f193190fb2d018b7cd10ee5dd2696d81c287
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
@@ -664,7 +664,7 @@ def _create_from_pandas_with_arrow(
 
         if verifySchema is _NoValue:
             # (With Arrow optimization) createDataFrame with `pandas.DataFrame`
-            verifySchema = False
+            verifySchema = self._jconf.arrowSafeTypeConversion()
 
         infer_pandas_dict_as_map = (
             str(self.conf.get("spark.sql.execution.pandas.inferPandasDictAsMap")).lower() == "true"

diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
@@ -1378,7 +1378,8 @@ def createDataFrame(  # type: ignore[misc]
             verify data types of every row against schema.
             If not provided, createDataFrame with
             - pyarrow.Table, verifySchema=False
-            - pandas.DataFrame with Arrow optimization, verifySchema=False
+            - pandas.DataFrame with Arrow optimization, verifySchema defaults to
+            `spark.sql.execution.pandas.convertToArrowArraySafely`
             - pandas.DataFrame without Arrow optimization, verifySchema=True
             - regular Python instances, verifySchema=True
             Arrow optimization is enabled/disabled via `spark.sql.execution.arrow.pyspark.enabled`.

diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
@@ -553,8 +553,13 @@ def test_createDataFrame_verifySchema(self):
 
         # pandas DataFrame with Arrow optimization
         pdf = pd.DataFrame(data)
-        df = self.spark.createDataFrame(pdf, schema=schema)  # verifySchema defaults to False
+        df = self.spark.createDataFrame(pdf, schema=schema)
+        # verifySchema defaults to `spark.sql.execution.pandas.convertToArrowArraySafely`,
+        # which is false by default
         self.assertEqual(df.collect(), expected)
+        with self.assertRaises(Exception):
+            with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": True}):
+                df = self.spark.createDataFrame(pdf, schema=schema)
         with self.assertRaises(Exception):
             df = self.spark.createDataFrame(pdf, schema=schema, verifySchema=True)