apache · viirya · May 13, 2018 · May 13, 2018 · May 15, 2018 · cloud-fan
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -4680,6 +4680,26 @@ def test_supported_types(self):
         self.assertPandasEqual(expected2, result2)
         self.assertPandasEqual(expected3, result3)
 
+    def test_array_type_correct(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType, array, col
+
+        df = self.data.withColumn("arr", array(col("id"))).repartition(1, "id")
+
+        output_schema = StructType(
+            [StructField('id', LongType()),
+             StructField('v', IntegerType()),
+             StructField('arr', ArrayType(LongType()))])
+
+        udf = pandas_udf(
+            lambda pdf: pdf,
+            output_schema,
+            PandasUDFType.GROUPED_MAP
+        )
+
+        result = df.groupby('id').apply(udf).sort('id').toPandas()
+        expected = df.toPandas().groupby('id').apply(udf.func).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
     def test_register_grouped_map_udf(self):
         from pyspark.sql.functions import pandas_udf, PandasUDFType
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -133,6 +133,14 @@ private[arrow] abstract class ArrowFieldWriter {
     valueVector match {
       case fixedWidthVector: BaseFixedWidthVector => fixedWidthVector.reset()
       case variableWidthVector: BaseVariableWidthVector => variableWidthVector.reset()
+      case listVector: ListVector =>
+        // Manual "reset" the underlying buffer.
+        // TODO: When we upgrade to Arrow 0.10.0, we can simply remove this and call
+        // `listVector.reset()`.
+        val buffers = listVector.getBuffers(false)
+        buffers.foreach(buf => buf.setZero(0, buf.capacity()))
+        listVector.setValueCount(0)
+        listVector.setLastSet(0)
       case _ =>
     }
     count = 0