Skip to content
Prev Previous commit
defaults to conf
  • Loading branch information
xinrong-meng committed Nov 14, 2024
commit 3f49f193190fb2d018b7cd10ee5dd2696d81c287
2 changes: 1 addition & 1 deletion python/pyspark/sql/pandas/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ def _create_from_pandas_with_arrow(

if verifySchema is _NoValue:
# (With Arrow optimization) createDataFrame with `pandas.DataFrame`
verifySchema = False
verifySchema = self._jconf.arrowSafeTypeConversion()

infer_pandas_dict_as_map = (
str(self.conf.get("spark.sql.execution.pandas.inferPandasDictAsMap")).lower() == "true"
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/sql/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,8 @@ def createDataFrame( # type: ignore[misc]
verify data types of every row against schema.
If not provided, createDataFrame with
- pyarrow.Table, verifySchema=False
- pandas.DataFrame with Arrow optimization, verifySchema=False
- pandas.DataFrame with Arrow optimization, verifySchema defaults to
`spark.sql.execution.pandas.convertToArrowArraySafely`
- pandas.DataFrame without Arrow optimization, verifySchema=True
- regular Python instances, verifySchema=True
Arrow optimization is enabled/disabled via `spark.sql.execution.arrow.pyspark.enabled`.
Expand Down
7 changes: 6 additions & 1 deletion python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,13 @@ def test_createDataFrame_verifySchema(self):

# pandas DataFrame with Arrow optimization
pdf = pd.DataFrame(data)
df = self.spark.createDataFrame(pdf, schema=schema) # verifySchema defaults to False
df = self.spark.createDataFrame(pdf, schema=schema)
# verifySchema defaults to `spark.sql.execution.pandas.convertToArrowArraySafely`,
# which is false by default
self.assertEqual(df.collect(), expected)
with self.assertRaises(Exception):
with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": True}):
df = self.spark.createDataFrame(pdf, schema=schema)
with self.assertRaises(Exception):
df = self.spark.createDataFrame(pdf, schema=schema, verifySchema=True)

Expand Down