collectPartitions()

apache · davies · Aug 5, 2014 · Aug 5, 2014 · Aug 5, 2014 · Aug 5, 2014
commit 4ffae0031e1f00641845fc5e9e3b62f54e7c56ad
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -737,10 +737,16 @@ def _collect_iterator_through_file(self, iterator):
 
     def collectPartitions(self, partitions):
         """
-        Return an array that contains all of the elements in a specific
+        Return a list of list that contains all of the elements in a specific
         partition of this RDD.
+
+        >>> rdd = sc.parallelize(range(8), 4)
+        >>> rdd.collectPartitions([1, 3])
+        [[2, 3], [6, 7]]
         """
-        raise NotImplementedError
+
+        return [self.ctx.runJob(self, lambda it: it, [p], True)
+                for p in partitions]
 
     def reduce(self, f):
         """