Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix; test
  • Loading branch information
xinrong-meng committed Nov 15, 2024
commit d524d1f2bef2710ab7d32f5959d202a6aa9efaef
10 changes: 6 additions & 4 deletions python/pyspark/sql/connect/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,13 +655,15 @@ def createDataFrame(
_table = pa.Table.from_arrays(
[pa.array(data[::, i]) for i in range(0, data.shape[1])], _cols
)
_table.cast()

# The _table should already have the proper column names.
_cols = None

if verifySchema is _NoValue:
verifySchema = True
if verifySchema is not _NoValue:
warnings.warn(
"'verifySchema' is ignored. It is not supported"
" with np.ndarray input on Spark Connect."
)

else:
_data = list(data)
Expand Down Expand Up @@ -702,7 +704,7 @@ def createDataFrame(
# Spark Connect will try its best to build the Arrow table with the
# inferred schema in the client side, and then rename the columns and
# cast the datatypes in the server side.
_table = LocalDataToArrowConversion.convert(_data, _schema, verifySchema)
_table = LocalDataToArrowConversion.convert(_data, _schema, cast(bool, verifySchema))

# TODO: Beside the validation on number of columns, we should also check
# whether the Arrow Schema is compatible with the user provided Schema.
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/sql/tests/connect/test_parity_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ def test_toPandas_udt(self):
def test_create_dataframe_namedtuples(self):
self.check_create_dataframe_namedtuples(True)

@unittest.skip("Spark Connect does not support verifySchema.")
def test_createDataFrame_verifySchema(self):
super().test_createDataFrame_verifySchema()

Expand Down