Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'master' into lookup
Conflicts:
	python/pyspark/rdd.py
  • Loading branch information
davies committed Aug 24, 2014
commit 2871b802a9a7145af1ae93594fbf6b01bd2bb1b7
20 changes: 18 additions & 2 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,22 @@ def __exit__(self, type, value, tb):
self._context._jsc.setCallSite(None)


class BoundedFloat(float):
"""
Bounded value is generated by approximate job, with confidence and low
bound and high bound.

>>> BoundedFloat(100.0, 0.95, 95.0, 105.0)
100.0
"""
def __new__(cls, mean, confidence, low, high):
obj = float.__new__(cls, mean)
obj.confidence = confidence
obj.low = low
obj.high = high
return obj


def _parse_memory(s):
"""
Parse a memory string in the format supported by Java (e.g. 1g, 200m) and
Expand Down Expand Up @@ -888,10 +904,10 @@ def top(self, num, key=None):
[4, 3, 2]
"""
def topIterator(iterator):
return [heapq.nlargest(num, iterator)]
yield heapq.nlargest(num, iterator, key=key)

def merge(a, b):
return heapq.nlargest(num, a + b)
return heapq.nlargest(num, a + b, key=key)

return self.mapPartitions(topIterator).reduce(merge)

Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.