Skip to content
Prev Previous commit
Next Next commit
Namedtuple for python 2.5
  • Loading branch information
Mark Costello committed Mar 22, 2013
commit 2cee541cc1e7e5a14cd3b12ee64760260bb86d86
120 changes: 62 additions & 58 deletions rdbtools/memprofiler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from collections import namedtuple
try:
from collections import namedtuple
except ImportError:
from rdbtools.utils import namedtuple

import random
import json

Expand All @@ -21,13 +25,13 @@ def next_record(self, record):
self.add_aggregate('database_memory', record.database, record.bytes)
self.add_aggregate('type_memory', record.type, record.bytes)
self.add_aggregate('encoding_memory', record.encoding, record.bytes)

self.add_aggregate('type_count', record.type, 1)
self.add_aggregate('encoding_count', record.encoding, 1)

self.add_histogram(record.type + "_length", record.size)
self.add_histogram(record.type + "_memory", (record.bytes/10) * 10)

if record.type == 'list':
self.add_scatter('list_memory_by_length', record.bytes, record.size)
elif record.type == 'hash':
Expand All @@ -44,12 +48,12 @@ def next_record(self, record):
def add_aggregate(self, heading, subheading, metric):
if not heading in self.aggregates :
self.aggregates[heading] = {}

if not subheading in self.aggregates[heading]:
self.aggregates[heading][subheading] = 0

self.aggregates[heading][subheading] += metric

def add_histogram(self, heading, metric):
if not heading in self.histograms:
self.histograms[heading] = {}
Expand All @@ -58,25 +62,25 @@ def add_histogram(self, heading, metric):
self.histograms[heading][metric] = 1
else :
self.histograms[heading][metric] += 1

def add_scatter(self, heading, x, y):
if not heading in self.scatters:
self.scatters[heading] = []
self.scatters[heading].append([x, y])

def get_json(self):
return json.dumps({"aggregates":self.aggregates, "scatters":self.scatters, "histograms":self.histograms})

class PrintAllKeys():
def __init__(self, out):
self._out = out
self._out.write("%s,%s,%s,%s,%s,%s,%s\n" % ("database", "type", "key",
self._out.write("%s,%s,%s,%s,%s,%s,%s\n" % ("database", "type", "key",
"size_in_bytes", "encoding", "num_elements", "len_largest_element"))

def next_record(self, record) :
self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key),
self._out.write("%d,%s,%s,%d,%s,%d,%d\n" % (record.database, record.type, encode_key(record.key),
record.bytes, record.encoding, record.size, record.len_largest_element))

class MemoryCallback(RdbCallback):
'''Calculates the memory used if this rdb file were loaded into RAM
The memory usage is approximate, and based on heuristics.
Expand All @@ -88,12 +92,12 @@ def __init__(self, stream, architecture):
self._current_encoding = None
self._current_length = 0
self._len_largest_element = 0

if architecture == 64 or architecture == '64':
self._pointer_size = 8
elif architecture == 32 or architecture == '32':
self._pointer_size = 4

def start_rdb(self):
pass

Expand All @@ -102,138 +106,138 @@ def start_database(self, db_number):

def end_database(self, db_number):
pass

def end_rdb(self):
pass

def set(self, key, value, expiry, info):
self._current_encoding = info['encoding']
size = self.sizeof_string(key) + self.sizeof_string(value) + self.top_level_object_overhead()
size += 2*self.robj_overhead()
size += self.key_expiry_overhead(expiry)

length = element_length(value)
record = MemoryRecord(self._dbnum, "string", key, size, self._current_encoding, length, length)
self._stream.next_record(record)
self.end_key()

def start_hash(self, key, length, expiry, info):
self._current_encoding = info['encoding']
self._current_length = length
self._current_length = length
size = self.sizeof_string(key)
size += 2*self.robj_overhead()
size += self.top_level_object_overhead()
size += self.key_expiry_overhead(expiry)

if 'sizeof_value' in info:
size += info['sizeof_value']
elif 'encoding' in info and info['encoding'] == 'hashtable':
size += self.hashtable_overhead(length)
else:
raise Exception('start_hash', 'Could not find encoding or sizeof_value in info object %s' % info)
self._current_size = size

def hset(self, key, field, value):
if(element_length(field) > self._len_largest_element) :
self._len_largest_element = element_length(field)
if(element_length(value) > self._len_largest_element) :
self._len_largest_element = element_length(value)

if self._current_encoding == 'hashtable':
self._current_size += self.sizeof_string(field)
self._current_size += self.sizeof_string(value)
self._current_size += self.hashtable_entry_overhead()
self._current_size += 2*self.robj_overhead()

def end_hash(self, key):
record = MemoryRecord(self._dbnum, "hash", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
self._stream.next_record(record)
self.end_key()

def start_set(self, key, cardinality, expiry, info):
# A set is exactly like a hashmap
self.start_hash(key, cardinality, expiry, info)

def sadd(self, key, member):
if(element_length(member) > self._len_largest_element) :
self._len_largest_element = element_length(member)

if self._current_encoding == 'hashtable':
self._current_size += self.sizeof_string(member)
self._current_size += self.hashtable_entry_overhead()
self._current_size += self.robj_overhead()

def end_set(self, key):
record = MemoryRecord(self._dbnum, "set", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
self._stream.next_record(record)
self.end_key()

def start_list(self, key, length, expiry, info):
self._current_length = length
self._current_encoding = info['encoding']
size = self.sizeof_string(key)
size += 2*self.robj_overhead()
size += self.top_level_object_overhead()
size += self.key_expiry_overhead(expiry)

if 'sizeof_value' in info:
size += info['sizeof_value']
elif 'encoding' in info and info['encoding'] == 'linkedlist':
size += self.linkedlist_overhead()
else:
raise Exception('start_list', 'Could not find encoding or sizeof_value in info object %s' % info)
self._current_size = size

def rpush(self, key, value) :
if(element_length(value) > self._len_largest_element) :
self._len_largest_element = element_length(value)

if self._current_encoding == 'linkedlist':
self._current_size += self.sizeof_string(value)
self._current_size += self.linkedlist_entry_overhead()
self._current_size += self.robj_overhead()

def end_list(self, key):
record = MemoryRecord(self._dbnum, "list", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
self._stream.next_record(record)
self.end_key()

def start_sorted_set(self, key, length, expiry, info):
self._current_length = length
self._current_encoding = info['encoding']
size = self.sizeof_string(key)
size += 2*self.robj_overhead()
size += self.top_level_object_overhead()
size += self.key_expiry_overhead(expiry)

if 'sizeof_value' in info:
size += info['sizeof_value']
elif 'encoding' in info and info['encoding'] == 'skiplist':
size += self.skiplist_overhead(length)
else:
raise Exception('start_sorted_set', 'Could not find encoding or sizeof_value in info object %s' % info)
self._current_size = size

def zadd(self, key, score, member):
if(element_length(member) > self._len_largest_element):
self._len_largest_element = element_length(member)

if self._current_encoding == 'skiplist':
self._current_size += 8 # self.sizeof_string(score)
self._current_size += self.sizeof_string(member)
self._current_size += 2*self.robj_overhead()
self._current_size += self.skiplist_entry_overhead()

def end_sorted_set(self, key):
record = MemoryRecord(self._dbnum, "sortedset", key, self._current_size, self._current_encoding, self._current_length, self._len_largest_element)
self._stream.next_record(record)
self.end_key()

def end_key(self):
self._current_encoding = None
self._current_size = 0
self._len_largest_element = 0

def sizeof_string(self, string):
# See struct sdshdr over here https://github.com/antirez/redis/blob/unstable/src/sds.h
# int len : 4 bytes
Expand All @@ -253,7 +257,7 @@ def sizeof_string(self, string):
return len(string) + 8 + 1 + self.malloc_overhead()

def top_level_object_overhead(self):
# Each top level object is an entry in a dictionary, and so we have to include
# Each top level object is an entry in a dictionary, and so we have to include
# the overhead of a dictionary entry
return self.hashtable_entry_overhead()

Expand All @@ -264,70 +268,70 @@ def key_expiry_overhead(self, expiry):
# Key expiry is stored in a hashtable, so we have to pay for the cost of a hashtable entry
# The timestamp itself is stored as an int64, which is a 8 bytes
return self.hashtable_entry_overhead() + 8

def hashtable_overhead(self, size):
# See https://github.com/antirez/redis/blob/unstable/src/dict.h
# See the structures dict and dictht
# 2 * (3 unsigned longs + 1 pointer) + 2 ints + 2 pointers
# = 56 + 4 * sizeof_pointer()
#
#
# Additionally, see **table in dictht
# The length of the table is the next power of 2
# When the hashtable is rehashing, another instance of **table is created
# We are assuming 0.5 percent probability of rehashing, and so multiply
# We are assuming 0.5 percent probability of rehashing, and so multiply
# the size of **table by 1.5
return 56 + 4*self.sizeof_pointer() + self.next_power(size)*self.sizeof_pointer()*1.5

def hashtable_entry_overhead(self):
# See https://github.com/antirez/redis/blob/unstable/src/dict.h
# Each dictEntry has 3 pointers
# Each dictEntry has 3 pointers
return 3*self.sizeof_pointer()

def linkedlist_overhead(self):
# See https://github.com/antirez/redis/blob/unstable/src/adlist.h
# A list has 5 pointers + an unsigned long
return 8 + 5*self.sizeof_pointer()

def linkedlist_entry_overhead(self):
# See https://github.com/antirez/redis/blob/unstable/src/adlist.h
# A node has 3 pointers
return 3*self.sizeof_pointer()

def skiplist_overhead(self, size):
return 2*self.sizeof_pointer() + self.hashtable_overhead(size) + (2*self.sizeof_pointer() + 16)

def skiplist_entry_overhead(self):
return self.hashtable_entry_overhead() + 2*self.sizeof_pointer() + 8 + (self.sizeof_pointer() + 8) * self.zset_random_level()

def robj_overhead(self):
return self.sizeof_pointer() + 8

def malloc_overhead(self):
return self.size_t()

def size_t(self):
return self.sizeof_pointer()

def sizeof_pointer(self):
return self._pointer_size

def next_power(self, size):
power = 1
while (power <= size) :
power = power << 1
return power

def zset_random_level(self):
level = 1
rint = random.randint(0, 0xFFFF)
while (rint < ZSKIPLIST_P * 0xFFFF):
level += 1
rint = random.randint(0, 0xFFFF)
rint = random.randint(0, 0xFFFF)
if level < ZSKIPLIST_MAXLEVEL :
return level
else:
return ZSKIPLIST_MAXLEVEL


def element_length(element):
if isinstance(element, int):
Expand All @@ -336,4 +340,4 @@ def element_length(element):
return 16
else:
return len(element)

Empty file added rdbtools/utils/__init__.py
Empty file.
Loading