Merge branch 'master' into lookup

Conflicts: python/pyspark/rdd.py
apache · davies · Aug 22, 2014 · Aug 23, 2014 · Aug 23, 2014 · Aug 24, 2014
commit 2871b802a9a7145af1ae93594fbf6b01bd2bb1b7
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -131,6 +131,22 @@ def __exit__(self, type, value, tb):
             self._context._jsc.setCallSite(None)
 
 
+class BoundedFloat(float):
+    """
+    Bounded value is generated by approximate job, with confidence and low
+    bound and high bound.
+
+    >>> BoundedFloat(100.0, 0.95, 95.0, 105.0)
+    100.0
+    """
+    def __new__(cls, mean, confidence, low, high):
+        obj = float.__new__(cls, mean)
+        obj.confidence = confidence
+        obj.low = low
+        obj.high = high
+        return obj
+
+
 def _parse_memory(s):
     """
     Parse a memory string in the format supported by Java (e.g. 1g, 200m) and
@@ -888,10 +904,10 @@ def top(self, num, key=None):
         [4, 3, 2]
         """
         def topIterator(iterator):
-            return [heapq.nlargest(num, iterator)]
+            yield heapq.nlargest(num, iterator, key=key)
 
         def merge(a, b):
-            return heapq.nlargest(num, a + b)
+            return heapq.nlargest(num, a + b, key=key)
 
         return self.mapPartitions(topIterator).reduce(merge)