Skip to content

Commit 4309d46

Browse files
committed
Use arrow_column.flatten().
1 parent f8b3404 commit 4309d46

File tree

3 files changed

+6
-11
lines changed

3 files changed

+6
-11
lines changed

python/pyspark/serializers.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -390,13 +390,8 @@ def arrow_to_pandas(self, arrow_column, data_type):
390390

391391
if self._df_for_struct and type(data_type) == StructType:
392392
import pandas as pd
393-
import pyarrow as pa
394-
column_arrays = zip(*[[chunk.field(i)
395-
for i in range(chunk.type.num_children)]
396-
for chunk in arrow_column.data.iterchunks()])
397-
series = [_arrow_column_to_pandas(pa.column(field.name, pa.chunked_array(arrays)),
398-
field.dataType)
399-
for arrays, field in zip(column_arrays, data_type)]
393+
series = [_arrow_column_to_pandas(column, field.dataType).rename(field.name)
394+
for column, field in zip(arrow_column.flatten(), data_type)]
400395
s = _check_dataframe_localize_timestamps(pd.concat(series, axis=1), self._timezone)
401396
else:
402397
s = super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(arrow_column, data_type)

python/pyspark/sql/tests/test_pandas_udf_scalar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def func(id):
294294

295295
struct_f = pandas_udf(lambda x: x, return_type)
296296
actual = df.select(struct_f(struct(col('id'), col('id').cast('string').alias('str'))))
297-
if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
297+
if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
298298
with QuietTest(self.sc):
299299
from py4j.protocol import Py4JJavaError
300300
with self.assertRaisesRegexp(

python/pyspark/sql/types.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,10 +1675,10 @@ def from_arrow_type(at):
16751675
raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
16761676
spark_type = ArrayType(from_arrow_type(at.value_type))
16771677
elif types.is_struct(at):
1678-
# TODO: remove version check once minimum pyarrow version is 0.11.0
1679-
if LooseVersion(pa.__version__) < LooseVersion("0.11.0"):
1678+
# TODO: remove version check once minimum pyarrow version is 0.10.0
1679+
if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
16801680
raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
1681-
"\nPlease install pyarrow >= 0.11.0 for StructType support.")
1681+
"\nPlease install pyarrow >= 0.10.0 for StructType support.")
16821682
if any(types.is_struct(field.type) for field in at):
16831683
raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
16841684
return StructType(

0 commit comments

Comments
 (0)